PTL: Chnaged the Thoth PTL details
[thoth.git] / models / failure_prediction / jnotebooks / vIMS_Visualization.ipynb
1 {
2  "cells": [
3   {
4    "cell_type": "markdown",
5    "metadata": {
6     "id": "pRQZOrAplLuo"
7    },
8    "source": [
9     "Contributors: **Rohit Singh Rathaur, Girish L.** \n",
10     "\n",
11     "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n",
12     "\n",
13     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
14     "you may not use this file except in compliance with the License.\n",
15     "You may obtain a copy of the License at\n",
16     "\n",
17     "    http://www.apache.org/licenses/LICENSE-2.0\n",
18     "\n",
19     "Unless required by applicable law or agreed to in writing, software\n",
20     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
21     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
22     "See the License for the specific language governing permissions and\n",
23     "limitations under the License."
24    ]
25   },
26   {
27    "cell_type": "code",
28    "execution_count": null,
29    "metadata": {
30     "id": "6rUjno0va6DX"
31    },
32    "outputs": [],
33    "source": [
34     "#import some necessary librairies\n",
35     "\n",
36     "import numpy as np # linear algebra\n",
37     "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)<\n",
38     "\n",
39     "\n",
40     "# To plot pretty figures\n",
41     "%matplotlib inline\n",
42     "import matplotlib.pyplot as plt\n",
43     "plt.rcParams['axes.labelsize'] = 14\n",
44     "plt.rcParams['xtick.labelsize'] = 12\n",
45     "plt.rcParams['ytick.labelsize'] = 12\n",
46     "\n",
47     "\n",
48     "import seaborn as sns\n",
49     "color = sns.color_palette()\n",
50     "sns.set_style('darkgrid')\n",
51     "\n",
52     "import warnings\n",
53     "def ignore_warn(*args, **kwargs):\n",
54     "    pass\n",
55     "warnings.warn = ignore_warn #ignore annoying warning (from sklearn and seaborn)\n",
56     "\n",
57     "\n",
58     "from scipy import stats\n",
59     "from scipy.stats import norm, skew #for some statistics\n",
60     "\n",
61     "\n",
62     "pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x)) #Limiting floats output to 3 decimal points\n",
63     "\n",
64     "\n",
65     "from subprocess import check_output\n",
66     "#print(check_output([\"ls\", \"../input\"]).decode(\"utf8\")) #check the files available in the directory"
67    ]
68   },
69   {
70    "cell_type": "markdown",
71    "metadata": {
72     "id": "K4CRikWVbT1d"
73    },
74    "source": [
75     "# **X.npy data /all data stored in the npy format**"
76    ]
77   },
78   {
79    "cell_type": "code",
80    "execution_count": null,
81    "metadata": {
82     "colab": {
83      "base_uri": "https://localhost:8080/"
84     },
85     "id": "geh5BNM3bhmT",
86     "outputId": "0f5af44d-de19-438b-e4de-5f703c59a687"
87    },
88    "outputs": [],
89    "source": [
90     "from google.colab import drive\n",
91     "drive.mount('/gdrive')"
92    ]
93   },
94   {
95    "cell_type": "code",
96    "execution_count": null,
97    "metadata": {
98     "id": "PanwhFGBbDV7"
99    },
100    "outputs": [],
101    "source": [
102     "# we are here loading the all dataset and showing the all features and sort them per server\n",
103     "X = np.load('/gdrive/MyDrive/LFN Anuket/Analysis/data/X.npy', allow_pickle=True)\n",
104     "dframe = pd.DataFrame(data=X,columns=['ellis-cpu.idle_perc', 'ralf-load.avg_15_min', 'bono-net.in_errors_sec', 'homer-net.out_bytes_sec', 'ellis-io.write_req_sec', 'homer-mem.total_mb', 'homestead-load.avg_1_min', 'homer-load.avg_1_min', 'sprout-cpu.stolen_perc', 'ralf-cpu.idle_perc', 'sprout-io.read_req_sec', 'homestead-net.in_bytes_sec', 'homer-disk.space_used_perc', 'bono-net.out_packets_sec', 'homer-cpu.wait_perc', 'ellis-net.in_packets_sec', 'bono-mem.free_mb', 'ellis-io.read_req_sec', 'bono-mem.usable_mb', 'bono-net.in_packets_dropped_sec', 'homestead-mem.free_mb', 'homer-io.write_time_sec', 'sprout-io.write_time_sec', 'homestead-net.in_errors_sec', 'homestead-mem.usable_perc', 'homestead-net.in_packets_dropped_sec', 'homestead-io.write_req_sec', 'bono-net.in_bytes_sec', 'homestead-disk.space_used_perc', 'homer-net.out_packets_sec', 'bono-mem.usable_perc', 'ralf-net.out_errors_sec', 'homestead-load.avg_5_min', 'sprout-io.read_kbytes_sec', 'sprout-net.out_errors_sec', 'homestead-io.write_kbytes_sec', 'homestead-net.in_packets_sec', 'sprout-mem.usable_mb', 'homestead-cpu.idle_perc', 'ralf-io.write_time_sec', 'ralf-io.write_kbytes_sec', 'ralf-io.write_req_sec', 'ellis-net.out_bytes_sec', 'bono-io.read_kbytes_sec', 'bono-disk.space_used_perc', 'homer-net.in_packets_dropped_sec', 'ralf-mem.usable_mb', 'bono-load.avg_15_min', 'bono-io.read_time_sec', 'sprout-mem.usable_perc', 'bono-cpu.idle_perc', 'homer-mem.usable_perc', 'homestead-cpu.stolen_perc', 'ralf-io.read_req_sec', 'homer-cpu.idle_perc', 'homestead-mem.total_mb', 'ralf-load.avg_1_min', 'homer-io.read_kbytes_sec', 'homestead-io.read_req_sec', 'ellis-mem.free_mb', 'bono-io.write_time_sec', 'ellis-net.out_errors_sec', 'ellis-cpu.stolen_perc', 'ellis-mem.usable_perc', 'ralf-disk.inode_used_perc', 'sprout-load.avg_15_min', 'ellis-io.read_time_sec', 'ralf-net.out_packets_sec', 'sprout-io.write_req_sec', 'bono-cpu.stolen_perc', 'homestead-load.avg_15_min', 'bono-cpu.system_perc', 'homestead-net.out_packets_sec', 'ellis-io.write_kbytes_sec', 'sprout-cpu.idle_perc', 'ellis-mem.total_mb', 'homer-mem.usable_mb', 'bono-load.avg_5_min', 'ellis-load.avg_5_min', 'homer-cpu.stolen_perc', 'sprout-net.out_bytes_sec', 'homestead-mem.usable_mb', 'homestead-disk.inode_used_perc', 'ralf-net.in_packets_dropped_sec', 'sprout-io.write_kbytes_sec', 'ellis-load.avg_15_min', 'homer-load.avg_5_min', 'ralf-mem.usable_perc', 'bono-net.out_bytes_sec', 'ellis-cpu.system_perc', 'homer-io.read_time_sec', 'ellis-disk.inode_used_perc', 'homestead-io.read_time_sec', 'sprout-net.in_bytes_sec', 'bono-io.write_kbytes_sec', 'homestead-io.read_kbytes_sec', 'ellis-net.in_errors_sec', 'sprout-io.read_time_sec', 'homer-disk.inode_used_perc', 'ralf-cpu.wait_perc', 'homer-load.avg_15_min', 'sprout-load.avg_5_min', 'homer-io.read_req_sec', 'ralf-mem.total_mb', 'homer-mem.free_mb', 'homer-net.in_packets_sec', 'homestead-net.out_bytes_sec', 'sprout-disk.inode_used_perc', 'ellis-mem.usable_mb', 'homer-io.write_kbytes_sec', 'homer-net.out_errors_sec', 'homer-cpu.system_perc', 'ellis-io.read_kbytes_sec', 'sprout-load.avg_1_min', 'sprout-cpu.system_perc', 'ralf-cpu.stolen_perc', 'bono-mem.total_mb', 'bono-net.out_errors_sec', 'ellis-io.write_time_sec', 'ralf-io.read_time_sec', 'sprout-cpu.wait_perc', 'ellis-cpu.wait_perc', 'ralf-disk.space_used_perc', 'ralf-net.out_bytes_sec', 'ellis-net.in_packets_dropped_sec', 'homer-net.in_bytes_sec', 'ellis-net.in_bytes_sec', 'bono-cpu.wait_perc', 'ralf-net.in_packets_sec', 'sprout-mem.total_mb', 'ralf-net.in_bytes_sec', 'bono-load.avg_1_min', 'sprout-net.in_packets_sec', 'bono-io.write_req_sec', 'ralf-load.avg_5_min', 'ralf-net.in_errors_sec', 'bono-disk.inode_used_perc', 'homestead-io.write_time_sec', 'ellis-net.out_packets_sec', 'sprout-disk.space_used_perc', 'ralf-io.read_kbytes_sec', 'homestead-cpu.system_perc', 'sprout-mem.free_mb', 'homer-net.in_errors_sec', 'homestead-net.out_errors_sec', 'homer-io.write_req_sec', 'sprout-net.in_errors_sec', 'ellis-disk.space_used_perc', 'sprout-net.out_packets_sec', 'sprout-net.in_packets_dropped_sec', 'ralf-cpu.system_perc', 'ralf-mem.free_mb', 'bono-io.read_req_sec', 'bono-net.in_packets_sec', 'homestead-cpu.wait_perc', 'ellis-load.avg_1_min'])"
105    ]
106   },
107   {
108    "cell_type": "code",
109    "execution_count": null,
110    "metadata": {
111     "id": "nAia0RRkbcSV"
112    },
113    "outputs": [],
114    "source": [
115     "dframesorted = dframe.sort_index(axis=1, ascending=True, inplace=False, kind='quicksort')"
116    ]
117   },
118   {
119    "cell_type": "code",
120    "execution_count": null,
121    "metadata": {
122     "colab": {
123      "base_uri": "https://localhost:8080/",
124      "height": 270
125     },
126     "id": "76XHMM3cfWGW",
127     "outputId": "87965d6f-7c2a-4c39-ed9e-f372a0ade7ca"
128    },
129    "outputs": [],
130    "source": [
131     "dframesorted.head()"
132    ]
133   },
134   {
135    "cell_type": "code",
136    "execution_count": null,
137    "metadata": {
138     "id": "lKCAS0m7fe5F"
139    },
140    "outputs": [],
141    "source": [
142     "#dframesorted.describe()"
143    ]
144   },
145   {
146    "cell_type": "code",
147    "execution_count": null,
148    "metadata": {
149     "colab": {
150      "base_uri": "https://localhost:8080/"
151     },
152     "id": "vnThYldxXdaw",
153     "outputId": "696f7fee-55eb-4d38-d471-17d1dd4e2e7e"
154    },
155    "outputs": [],
156    "source": [
157     "# here we print the name col to select later metrics per server. \n",
158     "print('Column names are: ',list(dframesorted.columns))"
159    ]
160   },
161   {
162    "cell_type": "markdown",
163    "metadata": {
164     "id": "8-pffUQBXojo"
165    },
166    "source": [
167     "# **X_126bis / all data with less 30 features and csv format**\n",
168     "cpu.stolen_perc\n",
169     "\n",
170     "mem.total_mb\n",
171     "\n",
172     "net.in_errors_sec\n",
173     "\n",
174     "net.in_packets_dropped_sec\n",
175     "\n",
176     "net.out_errors_sec"
177    ]
178   },
179   {
180    "cell_type": "code",
181    "execution_count": null,
182    "metadata": {
183     "colab": {
184      "base_uri": "https://localhost:8080/"
185     },
186     "id": "HE4xoUdNXgLi",
187     "outputId": "16443f8c-111b-4023-d192-345e02c5a49f"
188    },
189    "outputs": [],
190    "source": [
191     "X_126bis = dframesorted.drop(['sprout-cpu.stolen_perc', 'sprout-mem.total_mb', 'sprout-net.in_errors_sec', 'sprout-net.in_packets_dropped_sec', 'sprout-net.out_errors_sec','homer-cpu.stolen_perc', 'homer-mem.total_mb', 'homer-net.in_errors_sec', 'homer-net.in_packets_dropped_sec', 'homer-net.out_errors_sec','ellis-cpu.stolen_perc', 'ellis-mem.total_mb', 'ellis-net.in_errors_sec', 'ellis-net.in_packets_dropped_sec', 'ellis-net.out_errors_sec', 'bono-cpu.stolen_perc', 'bono-mem.total_mb', 'bono-net.in_errors_sec', 'bono-net.in_packets_dropped_sec', 'bono-net.out_errors_sec', 'ralf-cpu.stolen_perc', 'ralf-mem.total_mb', 'ralf-net.in_errors_sec', 'ralf-net.in_packets_dropped_sec', 'ralf-net.out_errors_sec', 'homestead-cpu.stolen_perc', 'homestead-mem.total_mb', 'homestead-net.in_errors_sec', 'homestead-net.in_packets_dropped_sec', 'homestead-net.out_errors_sec'], axis =1)\n",
192     "\n",
193     "dframesorted.shape, X_126bis.shape"
194    ]
195   },
196   {
197    "cell_type": "code",
198    "execution_count": null,
199    "metadata": {
200     "id": "5_8LHtazXvwf"
201    },
202    "outputs": [],
203    "source": [
204     "X_126bis.to_csv('X_126bis.csv', sep=',')"
205    ]
206   },
207   {
208    "cell_type": "markdown",
209    "metadata": {
210     "id": "sknAZgiPX6_2"
211    },
212    "source": [
213     "# **df_Ellis.csv / extract the ellis server metrics**\n",
214     "1) Ellis Server with 26 metrics\n",
215     "\n",
216     "2) subselection of the Ellis metrics"
217    ]
218   },
219   {
220    "cell_type": "code",
221    "execution_count": null,
222    "metadata": {
223     "id": "S6n-hPD3X43P"
224    },
225    "outputs": [],
226    "source": [
227     "# this function select column from the global df and create a new df with them\n",
228     "def select_columns(data_frame, column_names):\n",
229     "    new_frame = data_frame.loc[:, column_names]\n",
230     "    return new_frame\n",
231     "\n",
232     "selected_columns = ['ellis-cpu.idle_perc', 'ellis-cpu.stolen_perc', 'ellis-cpu.system_perc', 'ellis-cpu.wait_perc', 'ellis-disk.inode_used_perc', 'ellis-disk.space_used_perc', 'ellis-io.read_kbytes_sec', 'ellis-io.read_req_sec', 'ellis-io.read_time_sec', 'ellis-io.write_kbytes_sec', 'ellis-io.write_req_sec', 'ellis-io.write_time_sec', 'ellis-load.avg_15_min', 'ellis-load.avg_1_min', 'ellis-load.avg_5_min', 'ellis-mem.free_mb', 'ellis-mem.total_mb', 'ellis-mem.usable_mb', 'ellis-mem.usable_perc', 'ellis-net.in_bytes_sec', 'ellis-net.in_errors_sec', 'ellis-net.in_packets_dropped_sec', 'ellis-net.in_packets_sec', 'ellis-net.out_bytes_sec', 'ellis-net.out_errors_sec', 'ellis-net.out_packets_sec']\n",
233     "df_Ellis = select_columns(dframesorted, selected_columns)"
234    ]
235   },
236   {
237    "cell_type": "code",
238    "execution_count": null,
239    "metadata": {
240     "colab": {
241      "base_uri": "https://localhost:8080/",
242      "height": 241
243     },
244     "id": "VU27zosAYA0b",
245     "outputId": "32ed0bbb-0386-40c8-ba57-0787d3afda76"
246    },
247    "outputs": [],
248    "source": [
249     "df_Ellis.head()"
250    ]
251   },
252   {
253    "cell_type": "markdown",
254    "metadata": {
255     "id": "0k96D8aDYFw-"
256    },
257    "source": [
258     "# **df_Ellis_7 / focus on the main 6 metrics**"
259    ]
260   },
261   {
262    "cell_type": "code",
263    "execution_count": null,
264    "metadata": {
265     "id": "zVxKNXtfYCph"
266    },
267    "outputs": [],
268    "source": [
269     "selected_columns= ['ellis-load.avg_1_min', 'ellis-cpu.wait_perc', 'ellis-net.out_packets_sec', 'ellis-cpu.system_perc', 'ellis-net.in_bytes_sec', 'ellis-mem.free_mb']\n",
270     "df_Ellis_7 = select_columns(df_Ellis, selected_columns)"
271    ]
272   },
273   {
274    "cell_type": "code",
275    "execution_count": null,
276    "metadata": {
277     "colab": {
278      "base_uri": "https://localhost:8080/",
279      "height": 204
280     },
281     "id": "7y_uHZ5TYJiD",
282     "outputId": "5aef0247-67d8-4d19-c337-54075e096e07"
283    },
284    "outputs": [],
285    "source": [
286     "df_Ellis_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/df_Ellis.csv')\n",
287     "df_Ellis_7.head()"
288    ]
289   },
290   {
291    "cell_type": "code",
292    "execution_count": null,
293    "metadata": {
294     "colab": {
295      "base_uri": "https://localhost:8080/"
296     },
297     "id": "e9c4-7mZYLpq",
298     "outputId": "020a0e5f-a63a-4918-db1b-a9e1081ce38e"
299    },
300    "outputs": [],
301    "source": [
302     "df_Ellis_7.info()"
303    ]
304   },
305   {
306    "cell_type": "code",
307    "execution_count": null,
308    "metadata": {
309     "colab": {
310      "base_uri": "https://localhost:8080/",
311      "height": 204
312     },
313     "id": "iFy_Het0cqh7",
314     "outputId": "ad2ea7ad-1c70-4539-a558-bee07b0a8a19"
315    },
316    "outputs": [],
317    "source": [
318     "timestamp  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/timestamp.csv\")\n",
319     "timestamp.head()"
320    ]
321   },
322   {
323    "cell_type": "code",
324    "execution_count": null,
325    "metadata": {
326     "id": "q2-Wt45vdTzt"
327    },
328    "outputs": [],
329    "source": [
330     "df1 = timestamp[\"Timestamp\"]\n",
331     "df1\n",
332     "df1.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/TimestampNew.csv')"
333    ]
334   },
335   {
336    "cell_type": "code",
337    "execution_count": null,
338    "metadata": {
339     "colab": {
340      "base_uri": "https://localhost:8080/"
341     },
342     "id": "dcpx8F6ReZzb",
343     "outputId": "7009dff8-ee1a-43ee-b990-1bfba586cce9"
344    },
345    "outputs": [],
346    "source": [
347     "df1.head()"
348    ]
349   },
350   {
351    "cell_type": "code",
352    "execution_count": null,
353    "metadata": {
354     "id": "mybijUDWfdcH"
355    },
356    "outputs": [],
357    "source": [
358     "#df_Ellis_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/df_EllisTime.csv') "
359    ]
360   },
361   {
362    "cell_type": "code",
363    "execution_count": null,
364    "metadata": {
365     "colab": {
366      "base_uri": "https://localhost:8080/",
367      "height": 419
368     },
369     "id": "2TddekZAfiad",
370     "outputId": "be25c51a-d1c0-4da8-9838-72fb59d85369"
371    },
372    "outputs": [],
373    "source": [
374     "df_Ellis_7"
375    ]
376   },
377   {
378    "cell_type": "code",
379    "execution_count": null,
380    "metadata": {
381     "id": "7gstjLldYT_r"
382    },
383    "outputs": [],
384    "source": [
385     "# df_Ellis_7.describe()"
386    ]
387   },
388   {
389    "cell_type": "code",
390    "execution_count": null,
391    "metadata": {
392     "id": "bFMkMFqhs5pm"
393    },
394    "outputs": [],
395    "source": [
396     "# investigate why we need this float transformation. \n",
397     "df_Ellis_7 = df_Ellis_7.astype(np.float)"
398    ]
399   },
400   {
401    "cell_type": "code",
402    "execution_count": null,
403    "metadata": {
404     "colab": {
405      "base_uri": "https://localhost:8080/",
406      "height": 882
407     },
408     "id": "mWxDJNMwtGYs",
409     "outputId": "73a4a63a-4bd6-4ef0-b140-4ef299152fa2"
410    },
411    "outputs": [],
412    "source": [
413     "# we show here the hist\n",
414     "df_Ellis_7.hist(bins=100,figsize=(20,15))\n",
415     "#save_fig(\"attribute_histogram_plots\")\n",
416     "plt.show()"
417    ]
418   },
419   {
420    "cell_type": "code",
421    "execution_count": null,
422    "metadata": {
423     "id": "dedoBLq_tIG_"
424    },
425    "outputs": [],
426    "source": [
427     "df_Ellis_7.to_csv('df_Ellis_7.csv', sep=';')"
428    ]
429   },
430   {
431    "cell_type": "code",
432    "execution_count": null,
433    "metadata": {
434     "colab": {
435      "base_uri": "https://localhost:8080/",
436      "height": 1000
437     },
438     "id": "sObmuXeWtLL0",
439     "outputId": "e2bbea48-8fb3-4671-a4dc-5f9f2e464b59"
440    },
441    "outputs": [],
442    "source": [
443     "# we show here the boxplot\n",
444     "plt.figure(figsize=(20,20))\n",
445     "#df_Ellis_7.boxplot(figsize=(20,20))\n",
446     "ax = sns.boxplot(x=\"variable\", y=\"value\", data=pd.melt(df_Ellis_7))"
447    ]
448   },
449   {
450    "cell_type": "code",
451    "execution_count": null,
452    "metadata": {
453     "id": "wxNTLGMHtOF_"
454    },
455    "outputs": [],
456    "source": [
457     "# the gray related metrics will be dropped when using the df_Ellis\n",
458     "\n",
459     "#del df_Ellis['ellis-cpu.stolen_perc']\n",
460     "#del df_Ellis['ellis-mem.total_mb']\n",
461     "#del df_Ellis['ellis-net.in_errors_sec']\n",
462     "#del df_Ellis['ellis-net.in_packets_dropped_sec']\n",
463     "#del df_Ellis['ellis-net.out_errors_sec']"
464    ]
465   },
466   {
467    "cell_type": "code",
468    "execution_count": null,
469    "metadata": {
470     "colab": {
471      "base_uri": "https://localhost:8080/",
472      "height": 1000
473     },
474     "id": "pcmqBw0gtUT5",
475     "outputId": "b0da529e-8895-483a-d7d1-e450fc6762e0"
476    },
477    "outputs": [],
478    "source": [
479     "# we establish the corrmartrice\n",
480     "correaltionMatrice = df_Ellis_7.corr()\n",
481     "f, ax = plt.subplots(figsize=(30, 20))\n",
482     "sns.heatmap(correaltionMatrice, cbar=True, vmin=0, vmax=1, square=True, annot=True);\n",
483     "plt.show()"
484    ]
485   },
486   {
487    "cell_type": "code",
488    "execution_count": null,
489    "metadata": {
490     "colab": {
491      "base_uri": "https://localhost:8080/",
492      "height": 411
493     },
494     "id": "7yZNvlQ2tWlu",
495     "outputId": "dc56cd65-e5e5-4179-aac9-6c6aac43c0eb"
496    },
497    "outputs": [],
498    "source": [
499     "mask = np.zeros_like(correaltionMatrice)\n",
500     "mask[np.triu_indices_from(mask)] = True\n",
501     "with sns.axes_style(\"white\"):\n",
502     "     ax = sns.heatmap(correaltionMatrice, mask=mask, vmin=0,vmax=1, square=True)\n",
503     "plt.show()"
504    ]
505   },
506   {
507    "cell_type": "code",
508    "execution_count": null,
509    "metadata": {
510     "colab": {
511      "base_uri": "https://localhost:8080/",
512      "height": 204
513     },
514     "id": "Whxt9FahtZ6i",
515     "outputId": "f6dd23c3-82b5-4dd1-9f18-6bd373ff9322"
516    },
517    "outputs": [],
518    "source": [
519     "df_Ellis_7.shape\n",
520     "df_Ellis_7.head()"
521    ]
522   },
523   {
524    "cell_type": "code",
525    "execution_count": null,
526    "metadata": {
527     "colab": {
528      "base_uri": "https://localhost:8080/",
529      "height": 1000
530     },
531     "id": "Tmzw5MYctb3h",
532     "outputId": "65c21482-29fa-42e3-eb8f-50414bb9c656"
533    },
534    "outputs": [],
535    "source": [
536     "# we show here the scatter_matrix\n",
537     "from pandas.plotting import scatter_matrix\n",
538     "scatter_matrix(df_Ellis_7, alpha=0.2, figsize=(30,30))"
539    ]
540   },
541   {
542    "cell_type": "code",
543    "execution_count": null,
544    "metadata": {
545     "colab": {
546      "base_uri": "https://localhost:8080/",
547      "height": 1000
548     },
549     "id": "kcMuENePteVx",
550     "outputId": "7ed8fd61-a02c-4a01-f661-69938c0028ad"
551    },
552    "outputs": [],
553    "source": [
554     "# we show here the scatter_matrix (kde)\n",
555     "\n",
556     "scatter_matrix(df_Ellis_7, alpha=0.2, figsize=(30, 30), diagonal='kde')"
557    ]
558   },
559   {
560    "cell_type": "code",
561    "execution_count": null,
562    "metadata": {
563     "colab": {
564      "base_uri": "https://localhost:8080/",
565      "height": 1000
566     },
567     "id": "thrxP0LQth88",
568     "outputId": "d15b9169-79b7-47a0-e19f-b8477dd2b287"
569    },
570    "outputs": [],
571    "source": [
572     "#scatterplot the most obvious variable related to SalePrice\n",
573     "sns.pairplot(df_Ellis_7, size = 2.5)\n",
574     "plt.show();"
575    ]
576   },
577   {
578    "cell_type": "markdown",
579    "metadata": {
580     "id": "HNMDfsExILYz"
581    },
582    "source": [
583     "# **Bono**"
584    ]
585   },
586   {
587    "cell_type": "code",
588    "execution_count": null,
589    "metadata": {
590     "id": "mxaAX85otljN"
591    },
592    "outputs": [],
593    "source": [
594     "# this function select column from the global df and create a new df with them\n",
595     "def select_columns(data_frame, column_names):\n",
596     "    new_frame = data_frame.loc[:, column_names]\n",
597     "    return new_frame\n",
598     "\n",
599     "selected_columns = ['bono-cpu.idle_perc', 'bono-cpu.stolen_perc', 'bono-cpu.system_perc', 'bono-cpu.wait_perc', 'bono-disk.inode_used_perc', 'bono-disk.space_used_perc', 'bono-io.read_kbytes_sec', 'bono-io.read_req_sec', 'bono-io.read_time_sec', 'bono-io.write_kbytes_sec', 'bono-io.write_req_sec', 'bono-io.write_time_sec', 'bono-load.avg_15_min', 'bono-load.avg_1_min', 'bono-load.avg_5_min', 'bono-mem.free_mb', 'bono-mem.total_mb', 'bono-mem.usable_mb', 'bono-mem.usable_perc', 'bono-net.in_bytes_sec', 'bono-net.in_errors_sec', 'bono-net.in_packets_dropped_sec', 'bono-net.in_packets_sec', 'bono-net.out_bytes_sec', 'bono-net.out_errors_sec', 'bono-net.out_packets_sec']\n",
600     "df_Bono = select_columns(dframesorted, selected_columns)"
601    ]
602   },
603   {
604    "cell_type": "code",
605    "execution_count": null,
606    "metadata": {
607     "colab": {
608      "base_uri": "https://localhost:8080/",
609      "height": 241
610     },
611     "id": "EKi0wnvUIlZb",
612     "outputId": "4e7d2685-b09c-43ae-9217-d7210a253950"
613    },
614    "outputs": [],
615    "source": [
616     "df_Bono.head()"
617    ]
618   },
619   {
620    "cell_type": "code",
621    "execution_count": null,
622    "metadata": {
623     "id": "gGCA3rWuJdQx"
624    },
625    "outputs": [],
626    "source": [
627     "selected_columns= ['bono-load.avg_1_min', 'bono-cpu.wait_perc', 'bono-net.out_packets_sec', 'bono-cpu.system_perc', 'bono-net.in_bytes_sec', 'bono-mem.free_mb']\n",
628     "df_Bono_7 = select_columns(df_Bono, selected_columns)"
629    ]
630   },
631   {
632    "cell_type": "code",
633    "execution_count": null,
634    "metadata": {
635     "colab": {
636      "base_uri": "https://localhost:8080/",
637      "height": 204
638     },
639     "id": "9ZyhiI6AJo1Y",
640     "outputId": "9308469b-ba98-4152-dafa-308d3b6f7b04"
641    },
642    "outputs": [],
643    "source": [
644     "df_Bono_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/bono/df_Bono.csv')\n",
645     "df_Bono_7.head()"
646    ]
647   },
648   {
649    "cell_type": "code",
650    "execution_count": null,
651    "metadata": {
652     "colab": {
653      "base_uri": "https://localhost:8080/"
654     },
655     "id": "4ZUXUW35J2fr",
656     "outputId": "fc474730-e953-4f09-8633-ece4e493d9de"
657    },
658    "outputs": [],
659    "source": [
660     "df_Bono_7.info()"
661    ]
662   },
663   {
664    "cell_type": "markdown",
665    "metadata": {
666     "id": "QeQ3_oHsJ5BN"
667    },
668    "source": [
669     "# **Sprout**"
670    ]
671   },
672   {
673    "cell_type": "code",
674    "execution_count": null,
675    "metadata": {
676     "id": "gAQmKdKfJ3st"
677    },
678    "outputs": [],
679    "source": [
680     "# this function select column from the global df and create a new df with them\n",
681     "def select_columns(data_frame, column_names):\n",
682     "    new_frame = data_frame.loc[:, column_names]\n",
683     "    return new_frame\n",
684     "\n",
685     "selected_columns = ['sprout-cpu.idle_perc', 'sprout-cpu.stolen_perc', 'sprout-cpu.system_perc', 'sprout-cpu.wait_perc', 'sprout-disk.inode_used_perc', 'sprout-disk.space_used_perc', 'sprout-io.read_kbytes_sec', 'sprout-io.read_req_sec', 'sprout-io.read_time_sec', 'sprout-io.write_kbytes_sec', 'sprout-io.write_req_sec', 'sprout-io.write_time_sec', 'sprout-load.avg_15_min', 'sprout-load.avg_1_min', 'sprout-load.avg_5_min', 'sprout-mem.free_mb', 'sprout-mem.total_mb', 'sprout-mem.usable_mb', 'sprout-mem.usable_perc', 'sprout-net.in_bytes_sec', 'sprout-net.in_errors_sec', 'sprout-net.in_packets_dropped_sec', 'sprout-net.in_packets_sec', 'sprout-net.out_bytes_sec', 'sprout-net.out_errors_sec', 'sprout-net.out_packets_sec']\n",
686     "df_Sprout = select_columns(dframesorted, selected_columns)"
687    ]
688   },
689   {
690    "cell_type": "code",
691    "execution_count": null,
692    "metadata": {
693     "colab": {
694      "base_uri": "https://localhost:8080/",
695      "height": 241
696     },
697     "id": "wjdR0R7YMOyT",
698     "outputId": "4600cd66-0456-4da2-8723-17b5079f7f6b"
699    },
700    "outputs": [],
701    "source": [
702     "df_Sprout.head()"
703    ]
704   },
705   {
706    "cell_type": "code",
707    "execution_count": null,
708    "metadata": {
709     "id": "6Tfi58LJMUNu"
710    },
711    "outputs": [],
712    "source": [
713     "selected_columns= ['sprout-load.avg_1_min', 'sprout-cpu.wait_perc', 'sprout-net.out_packets_sec', 'sprout-cpu.system_perc', 'sprout-net.in_bytes_sec', 'sprout-mem.free_mb']\n",
714     "df_Sprout_7 = select_columns(df_Sprout, selected_columns)"
715    ]
716   },
717   {
718    "cell_type": "code",
719    "execution_count": null,
720    "metadata": {
721     "colab": {
722      "base_uri": "https://localhost:8080/",
723      "height": 204
724     },
725     "id": "fP5NM3VjM0Uw",
726     "outputId": "163390b3-9ce0-406a-d9c9-88687d696c66"
727    },
728    "outputs": [],
729    "source": [
730     "df_Sprout_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/sprout/df_Sprout.csv')\n",
731     "df_Sprout_7.head()"
732    ]
733   },
734   {
735    "cell_type": "code",
736    "execution_count": null,
737    "metadata": {
738     "colab": {
739      "base_uri": "https://localhost:8080/"
740     },
741     "id": "Ybl5WrSYM9Q0",
742     "outputId": "50f23729-a029-440f-dc6f-828308bb342c"
743    },
744    "outputs": [],
745    "source": [
746     "df_Sprout_7.info()"
747    ]
748   },
749   {
750    "cell_type": "markdown",
751    "metadata": {
752     "id": "AvKCUZL5NKKz"
753    },
754    "source": [
755     "# **Homestead**"
756    ]
757   },
758   {
759    "cell_type": "code",
760    "execution_count": null,
761    "metadata": {
762     "id": "EaXOAy7BNF8s"
763    },
764    "outputs": [],
765    "source": [
766     "# this function select column from the global df and create a new df with them\n",
767     "def select_columns(data_frame, column_names):\n",
768     "    new_frame = data_frame.loc[:, column_names]\n",
769     "    return new_frame\n",
770     "\n",
771     "selected_columns = ['homestead-cpu.idle_perc', 'homestead-cpu.stolen_perc', 'homestead-cpu.system_perc', 'homestead-cpu.wait_perc', 'homestead-disk.inode_used_perc', 'homestead-disk.space_used_perc', 'homestead-io.read_kbytes_sec', 'homestead-io.read_req_sec', 'homestead-io.read_time_sec', 'homestead-io.write_kbytes_sec', 'homestead-io.write_req_sec', 'homestead-io.write_time_sec', 'homestead-load.avg_15_min', 'homestead-load.avg_1_min', 'homestead-load.avg_5_min', 'homestead-mem.free_mb', 'homestead-mem.total_mb', 'homestead-mem.usable_mb', 'homestead-mem.usable_perc', 'homestead-net.in_bytes_sec', 'homestead-net.in_errors_sec', 'homestead-net.in_packets_dropped_sec', 'homestead-net.in_packets_sec', 'homestead-net.out_bytes_sec', 'homestead-net.out_errors_sec', 'homestead-net.out_packets_sec']\n",
772     "df_Homestead = select_columns(dframesorted, selected_columns)"
773    ]
774   },
775   {
776    "cell_type": "code",
777    "execution_count": null,
778    "metadata": {
779     "colab": {
780      "base_uri": "https://localhost:8080/",
781      "height": 241
782     },
783     "id": "Qc5vd1CVNas2",
784     "outputId": "8b6ab0c1-d15d-4545-e69b-dd39facc5915"
785    },
786    "outputs": [],
787    "source": [
788     "df_Homestead.head()"
789    ]
790   },
791   {
792    "cell_type": "code",
793    "execution_count": null,
794    "metadata": {
795     "id": "U5e-23VDNgjS"
796    },
797    "outputs": [],
798    "source": [
799     "selected_columns= ['homestead-load.avg_1_min', 'homestead-cpu.wait_perc', 'homestead-net.out_packets_sec', 'homestead-cpu.system_perc', 'homestead-net.in_bytes_sec', 'homestead-mem.free_mb']\n",
800     "df_Homestead_7 = select_columns(df_Homestead, selected_columns)"
801    ]
802   },
803   {
804    "cell_type": "code",
805    "execution_count": null,
806    "metadata": {
807     "colab": {
808      "base_uri": "https://localhost:8080/",
809      "height": 221
810     },
811     "id": "03Y_KhJVNuOC",
812     "outputId": "ab3eef43-6a18-4c61-926a-43afac19d7a5"
813    },
814    "outputs": [],
815    "source": [
816     "df_Homestead_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/homestead/df_Homestead.csv')\n",
817     "df_Homestead_7.head()"
818    ]
819   },
820   {
821    "cell_type": "code",
822    "execution_count": null,
823    "metadata": {
824     "colab": {
825      "base_uri": "https://localhost:8080/"
826     },
827     "id": "CVdMWcneN7ix",
828     "outputId": "f3e00de9-f60d-44d0-ae5b-9c4eda033687"
829    },
830    "outputs": [],
831    "source": [
832     "df_Homestead_7.info()"
833    ]
834   },
835   {
836    "cell_type": "markdown",
837    "metadata": {
838     "id": "_kw5-s7hOFN3"
839    },
840    "source": [
841     "# **Ralf**"
842    ]
843   },
844   {
845    "cell_type": "code",
846    "execution_count": null,
847    "metadata": {
848     "id": "v6xYItZWOANR"
849    },
850    "outputs": [],
851    "source": [
852     "# this function select column from the global df and create a new df with them\n",
853     "def select_columns(data_frame, column_names):\n",
854     "    new_frame = data_frame.loc[:, column_names]\n",
855     "    return new_frame\n",
856     "\n",
857     "selected_columns = ['ralf-cpu.idle_perc', 'ralf-cpu.stolen_perc', 'ralf-cpu.system_perc', 'ralf-cpu.wait_perc', 'ralf-disk.inode_used_perc', 'ralf-disk.space_used_perc', 'ralf-io.read_kbytes_sec', 'ralf-io.read_req_sec', 'ralf-io.read_time_sec', 'ralf-io.write_kbytes_sec', 'ralf-io.write_req_sec', 'ralf-io.write_time_sec', 'ralf-load.avg_15_min', 'ralf-load.avg_1_min', 'ralf-load.avg_5_min', 'ralf-mem.free_mb', 'ralf-mem.total_mb', 'ralf-mem.usable_mb', 'ralf-mem.usable_perc', 'ralf-net.in_bytes_sec', 'ralf-net.in_errors_sec', 'ralf-net.in_packets_dropped_sec', 'ralf-net.in_packets_sec', 'ralf-net.out_bytes_sec', 'ralf-net.out_errors_sec', 'ralf-net.out_packets_sec']\n",
858     "df_Ralf = select_columns(dframesorted, selected_columns)"
859    ]
860   },
861   {
862    "cell_type": "code",
863    "execution_count": null,
864    "metadata": {
865     "colab": {
866      "base_uri": "https://localhost:8080/",
867      "height": 241
868     },
869     "id": "XlYi3QBlORiI",
870     "outputId": "6a4740f0-b2c8-4e48-9067-c17c7a267b0d"
871    },
872    "outputs": [],
873    "source": [
874     "df_Ralf.head()"
875    ]
876   },
877   {
878    "cell_type": "code",
879    "execution_count": null,
880    "metadata": {
881     "id": "6l88CRqDOVs-"
882    },
883    "outputs": [],
884    "source": [
885     "selected_columns= ['ralf-load.avg_1_min', 'ralf-cpu.wait_perc', 'ralf-net.out_packets_sec', 'ralf-cpu.system_perc', 'ralf-net.in_bytes_sec', 'ralf-mem.free_mb']\n",
886     "df_Ralf_7 = select_columns(df_Ralf, selected_columns)"
887    ]
888   },
889   {
890    "cell_type": "code",
891    "execution_count": null,
892    "metadata": {
893     "colab": {
894      "base_uri": "https://localhost:8080/",
895      "height": 204
896     },
897     "id": "u3v26LoiOi5L",
898     "outputId": "441d54ec-a644-446e-dc5b-e7ad5ec58456"
899    },
900    "outputs": [],
901    "source": [
902     "df_Ralf_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/ralf/df_Ralf.csv')\n",
903     "df_Ralf_7.head()"
904    ]
905   },
906   {
907    "cell_type": "code",
908    "execution_count": null,
909    "metadata": {
910     "colab": {
911      "base_uri": "https://localhost:8080/"
912     },
913     "id": "mR7Fg1w8OtnU",
914     "outputId": "141c6b09-3c69-45cc-e723-728e1f0e8e68"
915    },
916    "outputs": [],
917    "source": [
918     "df_Ralf_7.info()"
919    ]
920   },
921   {
922    "cell_type": "markdown",
923    "metadata": {
924     "id": "2-udEbTHO-HS"
925    },
926    "source": [
927     "# **Homer**"
928    ]
929   },
930   {
931    "cell_type": "code",
932    "execution_count": null,
933    "metadata": {
934     "id": "vVpG5lJiOyYw"
935    },
936    "outputs": [],
937    "source": [
938     "# this function select column from the global df and create a new df with them\n",
939     "def select_columns(data_frame, column_names):\n",
940     "    new_frame = data_frame.loc[:, column_names]\n",
941     "    return new_frame\n",
942     "\n",
943     "selected_columns = ['homer-cpu.idle_perc', 'homer-cpu.stolen_perc', 'homer-cpu.system_perc', 'homer-cpu.wait_perc', 'homer-disk.inode_used_perc', 'homer-disk.space_used_perc', 'homer-io.read_kbytes_sec', 'homer-io.read_req_sec', 'homer-io.read_time_sec', 'homer-io.write_kbytes_sec', 'homer-io.write_req_sec', 'homer-io.write_time_sec', 'homer-load.avg_15_min', 'homer-load.avg_1_min', 'homer-load.avg_5_min', 'homer-mem.free_mb', 'homer-mem.total_mb', 'homer-mem.usable_mb', 'homer-mem.usable_perc', 'homer-net.in_bytes_sec', 'homer-net.in_errors_sec', 'homer-net.in_packets_dropped_sec', 'homer-net.in_packets_sec', 'homer-net.out_bytes_sec', 'homer-net.out_errors_sec', 'homer-net.out_packets_sec']\n",
944     "df_Homer = select_columns(dframesorted, selected_columns)"
945    ]
946   },
947   {
948    "cell_type": "code",
949    "execution_count": null,
950    "metadata": {
951     "colab": {
952      "base_uri": "https://localhost:8080/",
953      "height": 241
954     },
955     "id": "cwMKYQWUPKpl",
956     "outputId": "ca108468-0c41-4f44-aef0-8c63239c9fd5"
957    },
958    "outputs": [],
959    "source": [
960     "df_Homer.head()"
961    ]
962   },
963   {
964    "cell_type": "code",
965    "execution_count": null,
966    "metadata": {
967     "id": "rsUb47imPOyE"
968    },
969    "outputs": [],
970    "source": [
971     "selected_columns= ['homer-load.avg_1_min', 'homer-cpu.wait_perc', 'homer-net.out_packets_sec', 'homer-cpu.system_perc', 'homer-net.in_bytes_sec', 'homer-mem.free_mb']\n",
972     "df_Homer_7 = select_columns(df_Homer, selected_columns)"
973    ]
974   },
975   {
976    "cell_type": "code",
977    "execution_count": null,
978    "metadata": {
979     "colab": {
980      "base_uri": "https://localhost:8080/",
981      "height": 204
982     },
983     "id": "VZw7WV7tPd9i",
984     "outputId": "39dc88dd-eb89-45b0-a0d3-f261aab6bbd1"
985    },
986    "outputs": [],
987    "source": [
988     "df_Homer_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/homer/df_Homer.csv')\n",
989     "df_Homer_7.head()"
990    ]
991   },
992   {
993    "cell_type": "code",
994    "execution_count": null,
995    "metadata": {
996     "colab": {
997      "base_uri": "https://localhost:8080/"
998     },
999     "id": "E1Cqq8V3PnEv",
1000     "outputId": "9a80be17-5bb5-4f8d-a61c-d3206e730309"
1001    },
1002    "outputs": [],
1003    "source": [
1004     "df_Homer_7.info()"
1005    ]
1006   }
1007  ],
1008  "metadata": {
1009   "colab": {
1010    "name": "vIMS_Visualization.ipynb",
1011    "provenance": []
1012   },
1013   "kernelspec": {
1014    "display_name": "Python 3 (ipykernel)",
1015    "language": "python",
1016    "name": "python3"
1017   },
1018   "language_info": {
1019    "codemirror_mode": {
1020     "name": "ipython",
1021     "version": 3
1022    },
1023    "file_extension": ".py",
1024    "mimetype": "text/x-python",
1025    "name": "python",
1026    "nbconvert_exporter": "python",
1027    "pygments_lexer": "ipython3",
1028    "version": "3.9.7"
1029   }
1030  },
1031  "nbformat": 4,
1032  "nbformat_minor": 1
1033 }