Models: All models related to FP
[thoth.git] / models / failure_prediction / jnotebooks / FeatureCreation.ipynb
1 {
2   "nbformat": 4,
3   "nbformat_minor": 0,
4   "metadata": {
5     "colab": {
6       "name": "FeatureCreation.ipynb",
7       "provenance": []
8     },
9     "kernelspec": {
10       "name": "python3",
11       "display_name": "Python 3"
12     },
13     "language_info": {
14       "name": "python"
15     }
16   },
17   "cells": [
18     {
19       "cell_type": "markdown",
20       "metadata": {
21         "id": "zyycU3DFlecK"
22       },
23       "source": [
24         "Contributors: **Rohit Singh Rathaur, Girish L.** \n",
25         "\n",
26         "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n",
27         "\n",
28         "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
29         "you may not use this file except in compliance with the License.\n",
30         "You may obtain a copy of the License at\n",
31         "\n",
32         "    http://www.apache.org/licenses/LICENSE-2.0\n",
33         "\n",
34         "Unless required by applicable law or agreed to in writing, software\n",
35         "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
36         "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
37         "See the License for the specific language governing permissions and\n",
38         "limitations under the License."
39       ]
40     },
41     {
42       "cell_type": "code",
43       "metadata": {
44         "id": "gehKp2rySVf8"
45       },
46       "source": [
47         "# Import libraries use for visualization and analysis\n",
48         "import pandas as pd\n",
49         "import numpy as np\n",
50         "\n",
51         "%matplotlib inline\n",
52         "import matplotlib\n",
53         "import matplotlib.pyplot as plt\n",
54         "\n",
55         "from pandas import Series,DataFrame\n",
56         "import matplotlib.pyplot as plt\n",
57         "import seaborn as sns\n",
58         "from sklearn.preprocessing import scale\n",
59         "from sklearn.decomposition import PCA\n",
60         "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
61         "from scipy import stats\n",
62         "from IPython.display import display, HTML"
63       ],
64       "execution_count": null,
65       "outputs": []
66     },
67     {
68       "cell_type": "code",
69       "metadata": {
70         "colab": {
71           "base_uri": "https://localhost:8080/"
72         },
73         "id": "tkuBlbCXSsdP",
74         "outputId": "2b3ef633-a851-4c53-80eb-6b1bf4ffcc1c"
75       },
76       "source": [
77         "from google.colab import drive\n",
78         "drive.mount('/gdrive')"
79       ],
80       "execution_count": null,
81       "outputs": [
82         {
83           "output_type": "stream",
84           "text": [
85             "Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount(\"/gdrive\", force_remount=True).\n"
86           ],
87           "name": "stdout"
88         }
89       ]
90     },
91     {
92       "cell_type": "markdown",
93       "metadata": {
94         "id": "wZXe8D88S-6R"
95       },
96       "source": [
97         "# **Loading the Data**"
98       ]
99     },
100     {
101       "cell_type": "code",
102       "metadata": {
103         "id": "KiDSpl37Sy39"
104       },
105       "source": [
106         "df_Ellis  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv\")\n",
107         "#df_Bono  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv\", error_bad_lines=False)\n",
108         "#df_Sprout  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv\", error_bad_lines=False)\n",
109         "#df_Homer  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv\", error_bad_lines=False)\n",
110         "#df_Homestead  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv\", error_bad_lines=False)\n",
111         "#df_Ralf  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv\", error_bad_lines=False)"
112       ],
113       "execution_count": null,
114       "outputs": []
115     },
116     {
117       "cell_type": "code",
118       "metadata": {
119         "colab": {
120           "base_uri": "https://localhost:8080/",
121           "height": 204
122         },
123         "id": "dpy8jAm-TsCs",
124         "outputId": "d8ad2072-1fa3-4b3c-fb55-b5128767b349"
125       },
126       "source": [
127         "df_Ellis.head()"
128       ],
129       "execution_count": null,
130       "outputs": [
131         {
132           "output_type": "execute_result",
133           "data": {
134             "text/html": [
135               "<div>\n",
136               "<style scoped>\n",
137               "    .dataframe tbody tr th:only-of-type {\n",
138               "        vertical-align: middle;\n",
139               "    }\n",
140               "\n",
141               "    .dataframe tbody tr th {\n",
142               "        vertical-align: top;\n",
143               "    }\n",
144               "\n",
145               "    .dataframe thead th {\n",
146               "        text-align: right;\n",
147               "    }\n",
148               "</style>\n",
149               "<table border=\"1\" class=\"dataframe\">\n",
150               "  <thead>\n",
151               "    <tr style=\"text-align: right;\">\n",
152               "      <th></th>\n",
153               "      <th>Timestamp</th>\n",
154               "      <th>ellis-cpu.system_perc</th>\n",
155               "      <th>ellis-cpu.wait_perc</th>\n",
156               "      <th>ellis-load.avg_1_min</th>\n",
157               "      <th>ellis-mem.free_mb</th>\n",
158               "      <th>ellis-net.in_bytes_sec</th>\n",
159               "      <th>ellis-net.out_packets_sec</th>\n",
160               "    </tr>\n",
161               "  </thead>\n",
162               "  <tbody>\n",
163               "    <tr>\n",
164               "      <th>0</th>\n",
165               "      <td>14/09/2016 0:00</td>\n",
166               "      <td>0.5</td>\n",
167               "      <td>12.9</td>\n",
168               "      <td>1.73</td>\n",
169               "      <td>3949</td>\n",
170               "      <td>5413.200</td>\n",
171               "      <td>62.067</td>\n",
172               "    </tr>\n",
173               "    <tr>\n",
174               "      <th>1</th>\n",
175               "      <td>14/09/2016 0:00</td>\n",
176               "      <td>0.4</td>\n",
177               "      <td>10.3</td>\n",
178               "      <td>1.79</td>\n",
179               "      <td>3950</td>\n",
180               "      <td>5201.667</td>\n",
181               "      <td>59.567</td>\n",
182               "    </tr>\n",
183               "    <tr>\n",
184               "      <th>2</th>\n",
185               "      <td>14/09/2016 0:01</td>\n",
186               "      <td>0.4</td>\n",
187               "      <td>11.8</td>\n",
188               "      <td>1.52</td>\n",
189               "      <td>3950</td>\n",
190               "      <td>5370.733</td>\n",
191               "      <td>61.200</td>\n",
192               "    </tr>\n",
193               "    <tr>\n",
194               "      <th>3</th>\n",
195               "      <td>14/09/2016 0:01</td>\n",
196               "      <td>0.4</td>\n",
197               "      <td>12.9</td>\n",
198               "      <td>1.43</td>\n",
199               "      <td>3949</td>\n",
200               "      <td>5292.467</td>\n",
201               "      <td>60.400</td>\n",
202               "    </tr>\n",
203               "    <tr>\n",
204               "      <th>4</th>\n",
205               "      <td>14/09/2016 0:02</td>\n",
206               "      <td>0.5</td>\n",
207               "      <td>12.1</td>\n",
208               "      <td>1.44</td>\n",
209               "      <td>3950</td>\n",
210               "      <td>5318.167</td>\n",
211               "      <td>61.700</td>\n",
212               "    </tr>\n",
213               "  </tbody>\n",
214               "</table>\n",
215               "</div>"
216             ],
217             "text/plain": [
218               "         Timestamp  ...  ellis-net.out_packets_sec\n",
219               "0  14/09/2016 0:00  ...                     62.067\n",
220               "1  14/09/2016 0:00  ...                     59.567\n",
221               "2  14/09/2016 0:01  ...                     61.200\n",
222               "3  14/09/2016 0:01  ...                     60.400\n",
223               "4  14/09/2016 0:02  ...                     61.700\n",
224               "\n",
225               "[5 rows x 7 columns]"
226             ]
227           },
228           "metadata": {
229             "tags": []
230           },
231           "execution_count": 264
232         }
233       ]
234     },
235     {
236       "cell_type": "code",
237       "metadata": {
238         "colab": {
239           "base_uri": "https://localhost:8080/",
240           "height": 297
241         },
242         "id": "dJa9FgJNgqpI",
243         "outputId": "54d6c43d-489f-4347-93e5-12e4a4da2066"
244       },
245       "source": [
246         "df_Ellis.describe()"
247       ],
248       "execution_count": null,
249       "outputs": [
250         {
251           "output_type": "execute_result",
252           "data": {
253             "text/html": [
254               "<div>\n",
255               "<style scoped>\n",
256               "    .dataframe tbody tr th:only-of-type {\n",
257               "        vertical-align: middle;\n",
258               "    }\n",
259               "\n",
260               "    .dataframe tbody tr th {\n",
261               "        vertical-align: top;\n",
262               "    }\n",
263               "\n",
264               "    .dataframe thead th {\n",
265               "        text-align: right;\n",
266               "    }\n",
267               "</style>\n",
268               "<table border=\"1\" class=\"dataframe\">\n",
269               "  <thead>\n",
270               "    <tr style=\"text-align: right;\">\n",
271               "      <th></th>\n",
272               "      <th>ellis-cpu.system_perc</th>\n",
273               "      <th>ellis-cpu.wait_perc</th>\n",
274               "      <th>ellis-load.avg_1_min</th>\n",
275               "      <th>ellis-mem.free_mb</th>\n",
276               "      <th>ellis-net.in_bytes_sec</th>\n",
277               "      <th>ellis-net.out_packets_sec</th>\n",
278               "    </tr>\n",
279               "  </thead>\n",
280               "  <tbody>\n",
281               "    <tr>\n",
282               "      <th>count</th>\n",
283               "      <td>177000.000000</td>\n",
284               "      <td>177000.000000</td>\n",
285               "      <td>177000.000000</td>\n",
286               "      <td>177000.000000</td>\n",
287               "      <td>1.770000e+05</td>\n",
288               "      <td>177000.000000</td>\n",
289               "    </tr>\n",
290               "    <tr>\n",
291               "      <th>mean</th>\n",
292               "      <td>2.315540</td>\n",
293               "      <td>1.024163</td>\n",
294               "      <td>0.198842</td>\n",
295               "      <td>4206.847232</td>\n",
296               "      <td>1.855987e+07</td>\n",
297               "      <td>1336.694851</td>\n",
298               "    </tr>\n",
299               "    <tr>\n",
300               "      <th>std</th>\n",
301               "      <td>1.170977</td>\n",
302               "      <td>3.127178</td>\n",
303               "      <td>0.262227</td>\n",
304               "      <td>173.364297</td>\n",
305               "      <td>5.612164e+06</td>\n",
306               "      <td>2220.146124</td>\n",
307               "    </tr>\n",
308               "    <tr>\n",
309               "      <th>min</th>\n",
310               "      <td>0.100000</td>\n",
311               "      <td>0.000000</td>\n",
312               "      <td>0.000000</td>\n",
313               "      <td>2320.000000</td>\n",
314               "      <td>0.000000e+00</td>\n",
315               "      <td>0.000000</td>\n",
316               "    </tr>\n",
317               "    <tr>\n",
318               "      <th>25%</th>\n",
319               "      <td>1.500000</td>\n",
320               "      <td>0.200000</td>\n",
321               "      <td>0.095000</td>\n",
322               "      <td>4095.000000</td>\n",
323               "      <td>1.797602e+07</td>\n",
324               "      <td>182.033000</td>\n",
325               "    </tr>\n",
326               "    <tr>\n",
327               "      <th>50%</th>\n",
328               "      <td>1.700000</td>\n",
329               "      <td>0.200000</td>\n",
330               "      <td>0.140000</td>\n",
331               "      <td>4214.000000</td>\n",
332               "      <td>2.087674e+07</td>\n",
333               "      <td>200.067000</td>\n",
334               "    </tr>\n",
335               "    <tr>\n",
336               "      <th>75%</th>\n",
337               "      <td>3.500000</td>\n",
338               "      <td>0.400000</td>\n",
339               "      <td>0.198000</td>\n",
340               "      <td>4331.000000</td>\n",
341               "      <td>2.160859e+07</td>\n",
342               "      <td>1069.667000</td>\n",
343               "    </tr>\n",
344               "    <tr>\n",
345               "      <th>max</th>\n",
346               "      <td>16.700000</td>\n",
347               "      <td>22.400000</td>\n",
348               "      <td>2.580000</td>\n",
349               "      <td>4633.000000</td>\n",
350               "      <td>2.339041e+07</td>\n",
351               "      <td>7887.552000</td>\n",
352               "    </tr>\n",
353               "  </tbody>\n",
354               "</table>\n",
355               "</div>"
356             ],
357             "text/plain": [
358               "       ellis-cpu.system_perc  ...  ellis-net.out_packets_sec\n",
359               "count          177000.000000  ...              177000.000000\n",
360               "mean                2.315540  ...                1336.694851\n",
361               "std                 1.170977  ...                2220.146124\n",
362               "min                 0.100000  ...                   0.000000\n",
363               "25%                 1.500000  ...                 182.033000\n",
364               "50%                 1.700000  ...                 200.067000\n",
365               "75%                 3.500000  ...                1069.667000\n",
366               "max                16.700000  ...                7887.552000\n",
367               "\n",
368               "[8 rows x 6 columns]"
369             ]
370           },
371           "metadata": {
372             "tags": []
373           },
374           "execution_count": 265
375         }
376       ]
377     },
378     {
379       "cell_type": "code",
380       "metadata": {
381         "id": "xGVleQbnhRm6"
382       },
383       "source": [
384         "#df_Ellis['SLO1'] = 0\n",
385         "#print('Column names are: ',list(df_Ellis.columns))"
386       ],
387       "execution_count": null,
388       "outputs": []
389     },
390     {
391       "cell_type": "code",
392       "metadata": {
393         "colab": {
394           "base_uri": "https://localhost:8080/"
395         },
396         "id": "b-F_gA61xowR",
397         "outputId": "f9bd6232-2603-40ad-ccff-18887839e2da"
398       },
399       "source": [
400         "df4 = df_Ellis[\"ellis-load.avg_1_min\"] > 2.45\n",
401         "df4\n",
402         "df4.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')\n",
403         "df4.head(50)"
404       ],
405       "execution_count": null,
406       "outputs": [
407         {
408           "output_type": "execute_result",
409           "data": {
410             "text/plain": [
411               "0     False\n",
412               "1     False\n",
413               "2     False\n",
414               "3     False\n",
415               "4     False\n",
416               "5     False\n",
417               "6     False\n",
418               "7     False\n",
419               "8     False\n",
420               "9     False\n",
421               "10    False\n",
422               "11    False\n",
423               "12    False\n",
424               "13    False\n",
425               "14    False\n",
426               "15    False\n",
427               "16    False\n",
428               "17    False\n",
429               "18    False\n",
430               "19    False\n",
431               "20    False\n",
432               "21    False\n",
433               "22    False\n",
434               "23    False\n",
435               "24    False\n",
436               "25    False\n",
437               "26    False\n",
438               "27    False\n",
439               "28    False\n",
440               "29    False\n",
441               "30    False\n",
442               "31    False\n",
443               "32    False\n",
444               "33    False\n",
445               "34    False\n",
446               "35    False\n",
447               "36    False\n",
448               "37    False\n",
449               "38    False\n",
450               "39    False\n",
451               "40    False\n",
452               "41    False\n",
453               "42    False\n",
454               "43    False\n",
455               "44    False\n",
456               "45    False\n",
457               "46    False\n",
458               "47    False\n",
459               "48    False\n",
460               "49    False\n",
461               "Name: ellis-load.avg_1_min, dtype: bool"
462             ]
463           },
464           "metadata": {
465             "tags": []
466           },
467           "execution_count": 267
468         }
469       ]
470     },
471     {
472       "cell_type": "code",
473       "metadata": {
474         "colab": {
475           "base_uri": "https://localhost:8080/"
476         },
477         "id": "8xcPRerCz8nA",
478         "outputId": "fb66f20e-7365-40ec-857a-9dd9a8072401"
479       },
480       "source": [
481         "df3 = df_Ellis[\"ellis-cpu.wait_perc\"] > 5\n",
482         "df3\n",
483         "df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')\n",
484         "df3.head(50)"
485       ],
486       "execution_count": null,
487       "outputs": [
488         {
489           "output_type": "execute_result",
490           "data": {
491             "text/plain": [
492               "0     True\n",
493               "1     True\n",
494               "2     True\n",
495               "3     True\n",
496               "4     True\n",
497               "5     True\n",
498               "6     True\n",
499               "7     True\n",
500               "8     True\n",
501               "9     True\n",
502               "10    True\n",
503               "11    True\n",
504               "12    True\n",
505               "13    True\n",
506               "14    True\n",
507               "15    True\n",
508               "16    True\n",
509               "17    True\n",
510               "18    True\n",
511               "19    True\n",
512               "20    True\n",
513               "21    True\n",
514               "22    True\n",
515               "23    True\n",
516               "24    True\n",
517               "25    True\n",
518               "26    True\n",
519               "27    True\n",
520               "28    True\n",
521               "29    True\n",
522               "30    True\n",
523               "31    True\n",
524               "32    True\n",
525               "33    True\n",
526               "34    True\n",
527               "35    True\n",
528               "36    True\n",
529               "37    True\n",
530               "38    True\n",
531               "39    True\n",
532               "40    True\n",
533               "41    True\n",
534               "42    True\n",
535               "43    True\n",
536               "44    True\n",
537               "45    True\n",
538               "46    True\n",
539               "47    True\n",
540               "48    True\n",
541               "49    True\n",
542               "Name: ellis-cpu.wait_perc, dtype: bool"
543             ]
544           },
545           "metadata": {
546             "tags": []
547           },
548           "execution_count": 268
549         }
550       ]
551     },
552     {
553       "cell_type": "code",
554       "metadata": {
555         "id": "EED56Wiq_NjM",
556         "colab": {
557           "base_uri": "https://localhost:8080/"
558         },
559         "outputId": "20b06258-c5ba-457b-a022-cf5823217cbf"
560       },
561       "source": [
562         "df5 = df_Ellis[\"ellis-net.out_packets_sec\"] > 1000\n",
563         "df5\n",
564         "df5.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')\n",
565         "df5.head(50)"
566       ],
567       "execution_count": null,
568       "outputs": [
569         {
570           "output_type": "execute_result",
571           "data": {
572             "text/plain": [
573               "0     False\n",
574               "1     False\n",
575               "2     False\n",
576               "3     False\n",
577               "4     False\n",
578               "5     False\n",
579               "6     False\n",
580               "7     False\n",
581               "8     False\n",
582               "9     False\n",
583               "10    False\n",
584               "11    False\n",
585               "12    False\n",
586               "13    False\n",
587               "14    False\n",
588               "15    False\n",
589               "16    False\n",
590               "17    False\n",
591               "18    False\n",
592               "19    False\n",
593               "20    False\n",
594               "21    False\n",
595               "22    False\n",
596               "23    False\n",
597               "24    False\n",
598               "25    False\n",
599               "26    False\n",
600               "27    False\n",
601               "28    False\n",
602               "29    False\n",
603               "30    False\n",
604               "31    False\n",
605               "32    False\n",
606               "33    False\n",
607               "34    False\n",
608               "35    False\n",
609               "36    False\n",
610               "37    False\n",
611               "38    False\n",
612               "39    False\n",
613               "40    False\n",
614               "41    False\n",
615               "42    False\n",
616               "43    False\n",
617               "44    False\n",
618               "45    False\n",
619               "46    False\n",
620               "47    False\n",
621               "48    False\n",
622               "49    False\n",
623               "Name: ellis-net.out_packets_sec, dtype: bool"
624             ]
625           },
626           "metadata": {
627             "tags": []
628           },
629           "execution_count": 269
630         }
631       ]
632     },
633     {
634       "cell_type": "code",
635       "metadata": {
636         "colab": {
637           "base_uri": "https://localhost:8080/"
638         },
639         "id": "phlI40_y0mug",
640         "outputId": "7fa177b9-bf9a-4b96-db65-7402f7f6cf32"
641       },
642       "source": [
643         "# We are applying Logical OR Operator between df4 and df3\n",
644         "df6 = (df4[0:176999]) | (df3[0:176999])\n",
645         "df6.head(50)"
646       ],
647       "execution_count": null,
648       "outputs": [
649         {
650           "output_type": "execute_result",
651           "data": {
652             "text/plain": [
653               "0     True\n",
654               "1     True\n",
655               "2     True\n",
656               "3     True\n",
657               "4     True\n",
658               "5     True\n",
659               "6     True\n",
660               "7     True\n",
661               "8     True\n",
662               "9     True\n",
663               "10    True\n",
664               "11    True\n",
665               "12    True\n",
666               "13    True\n",
667               "14    True\n",
668               "15    True\n",
669               "16    True\n",
670               "17    True\n",
671               "18    True\n",
672               "19    True\n",
673               "20    True\n",
674               "21    True\n",
675               "22    True\n",
676               "23    True\n",
677               "24    True\n",
678               "25    True\n",
679               "26    True\n",
680               "27    True\n",
681               "28    True\n",
682               "29    True\n",
683               "30    True\n",
684               "31    True\n",
685               "32    True\n",
686               "33    True\n",
687               "34    True\n",
688               "35    True\n",
689               "36    True\n",
690               "37    True\n",
691               "38    True\n",
692               "39    True\n",
693               "40    True\n",
694               "41    True\n",
695               "42    True\n",
696               "43    True\n",
697               "44    True\n",
698               "45    True\n",
699               "46    True\n",
700               "47    True\n",
701               "48    True\n",
702               "49    True\n",
703               "dtype: bool"
704             ]
705           },
706           "metadata": {
707             "tags": []
708           },
709           "execution_count": 270
710         }
711       ]
712     },
713     {
714       "cell_type": "code",
715       "metadata": {
716         "colab": {
717           "base_uri": "https://localhost:8080/"
718         },
719         "id": "9xKYzZcLAZGy",
720         "outputId": "bc15e547-c791-4104-8bb2-8ed4d3288ac1"
721       },
722       "source": [
723         "df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')\n",
724         "df6.head(50)"
725       ],
726       "execution_count": null,
727       "outputs": [
728         {
729           "output_type": "execute_result",
730           "data": {
731             "text/plain": [
732               "0     True\n",
733               "1     True\n",
734               "2     True\n",
735               "3     True\n",
736               "4     True\n",
737               "5     True\n",
738               "6     True\n",
739               "7     True\n",
740               "8     True\n",
741               "9     True\n",
742               "10    True\n",
743               "11    True\n",
744               "12    True\n",
745               "13    True\n",
746               "14    True\n",
747               "15    True\n",
748               "16    True\n",
749               "17    True\n",
750               "18    True\n",
751               "19    True\n",
752               "20    True\n",
753               "21    True\n",
754               "22    True\n",
755               "23    True\n",
756               "24    True\n",
757               "25    True\n",
758               "26    True\n",
759               "27    True\n",
760               "28    True\n",
761               "29    True\n",
762               "30    True\n",
763               "31    True\n",
764               "32    True\n",
765               "33    True\n",
766               "34    True\n",
767               "35    True\n",
768               "36    True\n",
769               "37    True\n",
770               "38    True\n",
771               "39    True\n",
772               "40    True\n",
773               "41    True\n",
774               "42    True\n",
775               "43    True\n",
776               "44    True\n",
777               "45    True\n",
778               "46    True\n",
779               "47    True\n",
780               "48    True\n",
781               "49    True\n",
782               "dtype: bool"
783             ]
784           },
785           "metadata": {
786             "tags": []
787           },
788           "execution_count": 271
789         }
790       ]
791     },
792     {
793       "cell_type": "code",
794       "metadata": {
795         "id": "wRADpDibBZo5",
796         "colab": {
797           "base_uri": "https://localhost:8080/"
798         },
799         "outputId": "dfc6dc79-3d9f-4979-8210-e62e77b1aa6e"
800       },
801       "source": [
802         "df7 = (df6[0:176999]) | (df5[0:176999])\n",
803         "df7.head(50)"
804       ],
805       "execution_count": null,
806       "outputs": [
807         {
808           "output_type": "execute_result",
809           "data": {
810             "text/plain": [
811               "0     True\n",
812               "1     True\n",
813               "2     True\n",
814               "3     True\n",
815               "4     True\n",
816               "5     True\n",
817               "6     True\n",
818               "7     True\n",
819               "8     True\n",
820               "9     True\n",
821               "10    True\n",
822               "11    True\n",
823               "12    True\n",
824               "13    True\n",
825               "14    True\n",
826               "15    True\n",
827               "16    True\n",
828               "17    True\n",
829               "18    True\n",
830               "19    True\n",
831               "20    True\n",
832               "21    True\n",
833               "22    True\n",
834               "23    True\n",
835               "24    True\n",
836               "25    True\n",
837               "26    True\n",
838               "27    True\n",
839               "28    True\n",
840               "29    True\n",
841               "30    True\n",
842               "31    True\n",
843               "32    True\n",
844               "33    True\n",
845               "34    True\n",
846               "35    True\n",
847               "36    True\n",
848               "37    True\n",
849               "38    True\n",
850               "39    True\n",
851               "40    True\n",
852               "41    True\n",
853               "42    True\n",
854               "43    True\n",
855               "44    True\n",
856               "45    True\n",
857               "46    True\n",
858               "47    True\n",
859               "48    True\n",
860               "49    True\n",
861               "dtype: bool"
862             ]
863           },
864           "metadata": {
865             "tags": []
866           },
867           "execution_count": 272
868         }
869       ]
870     },
871     {
872       "cell_type": "code",
873       "metadata": {
874         "id": "w6BrDjX4CODn",
875         "colab": {
876           "base_uri": "https://localhost:8080/"
877         },
878         "outputId": "a6c956e7-6aed-4bdd-f37f-505a994de51a"
879       },
880       "source": [
881         "df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')\n",
882         "df7.head(50)"
883       ],
884       "execution_count": null,
885       "outputs": [
886         {
887           "output_type": "execute_result",
888           "data": {
889             "text/plain": [
890               "0     True\n",
891               "1     True\n",
892               "2     True\n",
893               "3     True\n",
894               "4     True\n",
895               "5     True\n",
896               "6     True\n",
897               "7     True\n",
898               "8     True\n",
899               "9     True\n",
900               "10    True\n",
901               "11    True\n",
902               "12    True\n",
903               "13    True\n",
904               "14    True\n",
905               "15    True\n",
906               "16    True\n",
907               "17    True\n",
908               "18    True\n",
909               "19    True\n",
910               "20    True\n",
911               "21    True\n",
912               "22    True\n",
913               "23    True\n",
914               "24    True\n",
915               "25    True\n",
916               "26    True\n",
917               "27    True\n",
918               "28    True\n",
919               "29    True\n",
920               "30    True\n",
921               "31    True\n",
922               "32    True\n",
923               "33    True\n",
924               "34    True\n",
925               "35    True\n",
926               "36    True\n",
927               "37    True\n",
928               "38    True\n",
929               "39    True\n",
930               "40    True\n",
931               "41    True\n",
932               "42    True\n",
933               "43    True\n",
934               "44    True\n",
935               "45    True\n",
936               "46    True\n",
937               "47    True\n",
938               "48    True\n",
939               "49    True\n",
940               "dtype: bool"
941             ]
942           },
943           "metadata": {
944             "tags": []
945           },
946           "execution_count": 273
947         }
948       ]
949     },
950     {
951       "cell_type": "code",
952       "metadata": {
953         "id": "wwv2cjFAIFHL"
954       },
955       "source": [
956         "df_Ellis.insert (7, \"Label\", df7)"
957       ],
958       "execution_count": null,
959       "outputs": []
960     },
961     {
962       "cell_type": "code",
963       "metadata": {
964         "id": "hrPqpjd96I1x"
965       },
966       "source": [
967         "#df_Ellis.insert (8, \"Label\", df7)"
968       ],
969       "execution_count": null,
970       "outputs": []
971     },
972     {
973       "cell_type": "code",
974       "metadata": {
975         "id": "_zKkQLOz6qPY"
976       },
977       "source": [
978         "# We applied Logical OR operator in two features only known as  and df3 and df4 and stored result in df6 which is known as Final Label after applying OR condition\n",
979         "df_Ellis\n",
980         "df_Ellis.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')"
981       ],
982       "execution_count": null,
983       "outputs": []
984     },
985     {
986       "cell_type": "code",
987       "metadata": {
988         "id": "3rEy1vtp67M9",
989         "colab": {
990           "base_uri": "https://localhost:8080/",
991           "height": 606
992         },
993         "outputId": "4e2175cc-dccb-4aaf-a152-e2452de241b0"
994       },
995       "source": [
996         "df_Ellis.head(100)"
997       ],
998       "execution_count": null,
999       "outputs": [
1000         {
1001           "output_type": "execute_result",
1002           "data": {
1003             "text/html": [
1004               "<div>\n",
1005               "<style scoped>\n",
1006               "    .dataframe tbody tr th:only-of-type {\n",
1007               "        vertical-align: middle;\n",
1008               "    }\n",
1009               "\n",
1010               "    .dataframe tbody tr th {\n",
1011               "        vertical-align: top;\n",
1012               "    }\n",
1013               "\n",
1014               "    .dataframe thead th {\n",
1015               "        text-align: right;\n",
1016               "    }\n",
1017               "</style>\n",
1018               "<table border=\"1\" class=\"dataframe\">\n",
1019               "  <thead>\n",
1020               "    <tr style=\"text-align: right;\">\n",
1021               "      <th></th>\n",
1022               "      <th>Timestamp</th>\n",
1023               "      <th>ellis-cpu.system_perc</th>\n",
1024               "      <th>ellis-cpu.wait_perc</th>\n",
1025               "      <th>ellis-load.avg_1_min</th>\n",
1026               "      <th>ellis-mem.free_mb</th>\n",
1027               "      <th>ellis-net.in_bytes_sec</th>\n",
1028               "      <th>ellis-net.out_packets_sec</th>\n",
1029               "      <th>Label</th>\n",
1030               "    </tr>\n",
1031               "  </thead>\n",
1032               "  <tbody>\n",
1033               "    <tr>\n",
1034               "      <th>0</th>\n",
1035               "      <td>14/09/2016 0:00</td>\n",
1036               "      <td>0.5</td>\n",
1037               "      <td>12.9</td>\n",
1038               "      <td>1.73</td>\n",
1039               "      <td>3949</td>\n",
1040               "      <td>5413.200</td>\n",
1041               "      <td>62.067</td>\n",
1042               "      <td>True</td>\n",
1043               "    </tr>\n",
1044               "    <tr>\n",
1045               "      <th>1</th>\n",
1046               "      <td>14/09/2016 0:00</td>\n",
1047               "      <td>0.4</td>\n",
1048               "      <td>10.3</td>\n",
1049               "      <td>1.79</td>\n",
1050               "      <td>3950</td>\n",
1051               "      <td>5201.667</td>\n",
1052               "      <td>59.567</td>\n",
1053               "      <td>True</td>\n",
1054               "    </tr>\n",
1055               "    <tr>\n",
1056               "      <th>2</th>\n",
1057               "      <td>14/09/2016 0:01</td>\n",
1058               "      <td>0.4</td>\n",
1059               "      <td>11.8</td>\n",
1060               "      <td>1.52</td>\n",
1061               "      <td>3950</td>\n",
1062               "      <td>5370.733</td>\n",
1063               "      <td>61.200</td>\n",
1064               "      <td>True</td>\n",
1065               "    </tr>\n",
1066               "    <tr>\n",
1067               "      <th>3</th>\n",
1068               "      <td>14/09/2016 0:01</td>\n",
1069               "      <td>0.4</td>\n",
1070               "      <td>12.9</td>\n",
1071               "      <td>1.43</td>\n",
1072               "      <td>3949</td>\n",
1073               "      <td>5292.467</td>\n",
1074               "      <td>60.400</td>\n",
1075               "      <td>True</td>\n",
1076               "    </tr>\n",
1077               "    <tr>\n",
1078               "      <th>4</th>\n",
1079               "      <td>14/09/2016 0:02</td>\n",
1080               "      <td>0.5</td>\n",
1081               "      <td>12.1</td>\n",
1082               "      <td>1.44</td>\n",
1083               "      <td>3950</td>\n",
1084               "      <td>5318.167</td>\n",
1085               "      <td>61.700</td>\n",
1086               "      <td>True</td>\n",
1087               "    </tr>\n",
1088               "    <tr>\n",
1089               "      <th>...</th>\n",
1090               "      <td>...</td>\n",
1091               "      <td>...</td>\n",
1092               "      <td>...</td>\n",
1093               "      <td>...</td>\n",
1094               "      <td>...</td>\n",
1095               "      <td>...</td>\n",
1096               "      <td>...</td>\n",
1097               "      <td>...</td>\n",
1098               "    </tr>\n",
1099               "    <tr>\n",
1100               "      <th>95</th>\n",
1101               "      <td>14/09/2016 0:47</td>\n",
1102               "      <td>0.5</td>\n",
1103               "      <td>10.8</td>\n",
1104               "      <td>0.45</td>\n",
1105               "      <td>3948</td>\n",
1106               "      <td>5187.133</td>\n",
1107               "      <td>60.100</td>\n",
1108               "      <td>True</td>\n",
1109               "    </tr>\n",
1110               "    <tr>\n",
1111               "      <th>96</th>\n",
1112               "      <td>14/09/2016 0:48</td>\n",
1113               "      <td>0.5</td>\n",
1114               "      <td>10.4</td>\n",
1115               "      <td>0.42</td>\n",
1116               "      <td>3949</td>\n",
1117               "      <td>5223.100</td>\n",
1118               "      <td>60.233</td>\n",
1119               "      <td>True</td>\n",
1120               "    </tr>\n",
1121               "    <tr>\n",
1122               "      <th>97</th>\n",
1123               "      <td>14/09/2016 0:48</td>\n",
1124               "      <td>0.6</td>\n",
1125               "      <td>13.0</td>\n",
1126               "      <td>0.56</td>\n",
1127               "      <td>3947</td>\n",
1128               "      <td>5335.200</td>\n",
1129               "      <td>60.667</td>\n",
1130               "      <td>True</td>\n",
1131               "    </tr>\n",
1132               "    <tr>\n",
1133               "      <th>98</th>\n",
1134               "      <td>14/09/2016 0:49</td>\n",
1135               "      <td>0.6</td>\n",
1136               "      <td>10.1</td>\n",
1137               "      <td>0.47</td>\n",
1138               "      <td>3948</td>\n",
1139               "      <td>5185.733</td>\n",
1140               "      <td>60.367</td>\n",
1141               "      <td>True</td>\n",
1142               "    </tr>\n",
1143               "    <tr>\n",
1144               "      <th>99</th>\n",
1145               "      <td>14/09/2016 0:49</td>\n",
1146               "      <td>0.6</td>\n",
1147               "      <td>10.8</td>\n",
1148               "      <td>0.28</td>\n",
1149               "      <td>3948</td>\n",
1150               "      <td>5204.233</td>\n",
1151               "      <td>59.600</td>\n",
1152               "      <td>True</td>\n",
1153               "    </tr>\n",
1154               "  </tbody>\n",
1155               "</table>\n",
1156               "<p>100 rows × 8 columns</p>\n",
1157               "</div>"
1158             ],
1159             "text/plain": [
1160               "          Timestamp  ellis-cpu.system_perc  ...  ellis-net.out_packets_sec  Label\n",
1161               "0   14/09/2016 0:00                    0.5  ...                     62.067   True\n",
1162               "1   14/09/2016 0:00                    0.4  ...                     59.567   True\n",
1163               "2   14/09/2016 0:01                    0.4  ...                     61.200   True\n",
1164               "3   14/09/2016 0:01                    0.4  ...                     60.400   True\n",
1165               "4   14/09/2016 0:02                    0.5  ...                     61.700   True\n",
1166               "..              ...                    ...  ...                        ...    ...\n",
1167               "95  14/09/2016 0:47                    0.5  ...                     60.100   True\n",
1168               "96  14/09/2016 0:48                    0.5  ...                     60.233   True\n",
1169               "97  14/09/2016 0:48                    0.6  ...                     60.667   True\n",
1170               "98  14/09/2016 0:49                    0.6  ...                     60.367   True\n",
1171               "99  14/09/2016 0:49                    0.6  ...                     59.600   True\n",
1172               "\n",
1173               "[100 rows x 8 columns]"
1174             ]
1175           },
1176           "metadata": {
1177             "tags": []
1178           },
1179           "execution_count": 277
1180         }
1181       ]
1182     },
1183     {
1184       "cell_type": "code",
1185       "metadata": {
1186         "colab": {
1187           "base_uri": "https://localhost:8080/"
1188         },
1189         "id": "11Qu45RY0HNG",
1190         "outputId": "305c5dd5-ec61-48a8-abb6-e29bbc4b9e42"
1191       },
1192       "source": [
1193         "# pandas count distinct values in column\n",
1194         "df_Ellis['Label'].value_counts()"
1195       ],
1196       "execution_count": null,
1197       "outputs": [
1198         {
1199           "output_type": "execute_result",
1200           "data": {
1201             "text/plain": [
1202               "False    112145\n",
1203               "True      64854\n",
1204               "Name: Label, dtype: int64"
1205             ]
1206           },
1207           "metadata": {
1208             "tags": []
1209           },
1210           "execution_count": 278
1211         }
1212       ]
1213     },
1214     {
1215       "cell_type": "code",
1216       "metadata": {
1217         "id": "0sB-W_Ny4eHk"
1218       },
1219       "source": [
1220         "#final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')"
1221       ],
1222       "execution_count": null,
1223       "outputs": []
1224     },
1225     {
1226       "cell_type": "code",
1227       "metadata": {
1228         "id": "ERsufys7wcSg"
1229       },
1230       "source": [
1231         "#df_Ellis.loc[(df_Ellis[\"ellis-cpu.wait_perc\"] > 5) & (df_Ellis[\"ellis-load.avg_1_min\"] > 2)]"
1232       ],
1233       "execution_count": null,
1234       "outputs": []
1235     },
1236     {
1237       "cell_type": "markdown",
1238       "metadata": {
1239         "id": "9le7MwnDhlnH"
1240       },
1241       "source": [
1242         "# **Creating New Features**"
1243       ]
1244     },
1245     {
1246       "cell_type": "code",
1247       "metadata": {
1248         "id": "090QXGpPlEF6"
1249       },
1250       "source": [
1251         ""
1252       ],
1253       "execution_count": null,
1254       "outputs": []
1255     }
1256   ]
1257 }