4 "cell_type": "markdown",
9 "Contributors: **Rohit Singh Rathaur, Girish L.** \n",
11 "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n",
13 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
14 "you may not use this file except in compliance with the License.\n",
15 "You may obtain a copy of the License at\n",
17 " http://www.apache.org/licenses/LICENSE-2.0\n",
19 "Unless required by applicable law or agreed to in writing, software\n",
20 "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
21 "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
22 "See the License for the specific language governing permissions and\n",
23 "limitations under the License."
28 "execution_count": null,
34 "#import some necessary librairies\n",
36 "import numpy as np # linear algebra\n",
37 "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)<\n",
40 "# To plot pretty figures\n",
41 "%matplotlib inline\n",
42 "import matplotlib.pyplot as plt\n",
43 "plt.rcParams['axes.labelsize'] = 14\n",
44 "plt.rcParams['xtick.labelsize'] = 12\n",
45 "plt.rcParams['ytick.labelsize'] = 12\n",
48 "import seaborn as sns\n",
49 "color = sns.color_palette()\n",
50 "sns.set_style('darkgrid')\n",
53 "def ignore_warn(*args, **kwargs):\n",
55 "warnings.warn = ignore_warn #ignore annoying warning (from sklearn and seaborn)\n",
58 "from scipy import stats\n",
59 "from scipy.stats import norm, skew #for some statistics\n",
62 "pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x)) #Limiting floats output to 3 decimal points\n",
65 "from subprocess import check_output\n",
66 "#print(check_output([\"ls\", \"../input\"]).decode(\"utf8\")) #check the files available in the directory"
70 "cell_type": "markdown",
75 "# **X.npy data /all data stored in the npy format**"
80 "execution_count": null,
83 "base_uri": "https://localhost:8080/"
86 "outputId": "0f5af44d-de19-438b-e4de-5f703c59a687"
90 "from google.colab import drive\n",
91 "drive.mount('/gdrive')"
96 "execution_count": null,
102 "# we are here loading the all dataset and showing the all features and sort them per server\n",
103 "X = np.load('/gdrive/MyDrive/LFN Anuket/Analysis/data/X.npy', allow_pickle=True)\n",
104 "dframe = pd.DataFrame(data=X,columns=['ellis-cpu.idle_perc', 'ralf-load.avg_15_min', 'bono-net.in_errors_sec', 'homer-net.out_bytes_sec', 'ellis-io.write_req_sec', 'homer-mem.total_mb', 'homestead-load.avg_1_min', 'homer-load.avg_1_min', 'sprout-cpu.stolen_perc', 'ralf-cpu.idle_perc', 'sprout-io.read_req_sec', 'homestead-net.in_bytes_sec', 'homer-disk.space_used_perc', 'bono-net.out_packets_sec', 'homer-cpu.wait_perc', 'ellis-net.in_packets_sec', 'bono-mem.free_mb', 'ellis-io.read_req_sec', 'bono-mem.usable_mb', 'bono-net.in_packets_dropped_sec', 'homestead-mem.free_mb', 'homer-io.write_time_sec', 'sprout-io.write_time_sec', 'homestead-net.in_errors_sec', 'homestead-mem.usable_perc', 'homestead-net.in_packets_dropped_sec', 'homestead-io.write_req_sec', 'bono-net.in_bytes_sec', 'homestead-disk.space_used_perc', 'homer-net.out_packets_sec', 'bono-mem.usable_perc', 'ralf-net.out_errors_sec', 'homestead-load.avg_5_min', 'sprout-io.read_kbytes_sec', 'sprout-net.out_errors_sec', 'homestead-io.write_kbytes_sec', 'homestead-net.in_packets_sec', 'sprout-mem.usable_mb', 'homestead-cpu.idle_perc', 'ralf-io.write_time_sec', 'ralf-io.write_kbytes_sec', 'ralf-io.write_req_sec', 'ellis-net.out_bytes_sec', 'bono-io.read_kbytes_sec', 'bono-disk.space_used_perc', 'homer-net.in_packets_dropped_sec', 'ralf-mem.usable_mb', 'bono-load.avg_15_min', 'bono-io.read_time_sec', 'sprout-mem.usable_perc', 'bono-cpu.idle_perc', 'homer-mem.usable_perc', 'homestead-cpu.stolen_perc', 'ralf-io.read_req_sec', 'homer-cpu.idle_perc', 'homestead-mem.total_mb', 'ralf-load.avg_1_min', 'homer-io.read_kbytes_sec', 'homestead-io.read_req_sec', 'ellis-mem.free_mb', 'bono-io.write_time_sec', 'ellis-net.out_errors_sec', 'ellis-cpu.stolen_perc', 'ellis-mem.usable_perc', 'ralf-disk.inode_used_perc', 'sprout-load.avg_15_min', 'ellis-io.read_time_sec', 'ralf-net.out_packets_sec', 'sprout-io.write_req_sec', 'bono-cpu.stolen_perc', 'homestead-load.avg_15_min', 'bono-cpu.system_perc', 'homestead-net.out_packets_sec', 'ellis-io.write_kbytes_sec', 'sprout-cpu.idle_perc', 'ellis-mem.total_mb', 'homer-mem.usable_mb', 'bono-load.avg_5_min', 'ellis-load.avg_5_min', 'homer-cpu.stolen_perc', 'sprout-net.out_bytes_sec', 'homestead-mem.usable_mb', 'homestead-disk.inode_used_perc', 'ralf-net.in_packets_dropped_sec', 'sprout-io.write_kbytes_sec', 'ellis-load.avg_15_min', 'homer-load.avg_5_min', 'ralf-mem.usable_perc', 'bono-net.out_bytes_sec', 'ellis-cpu.system_perc', 'homer-io.read_time_sec', 'ellis-disk.inode_used_perc', 'homestead-io.read_time_sec', 'sprout-net.in_bytes_sec', 'bono-io.write_kbytes_sec', 'homestead-io.read_kbytes_sec', 'ellis-net.in_errors_sec', 'sprout-io.read_time_sec', 'homer-disk.inode_used_perc', 'ralf-cpu.wait_perc', 'homer-load.avg_15_min', 'sprout-load.avg_5_min', 'homer-io.read_req_sec', 'ralf-mem.total_mb', 'homer-mem.free_mb', 'homer-net.in_packets_sec', 'homestead-net.out_bytes_sec', 'sprout-disk.inode_used_perc', 'ellis-mem.usable_mb', 'homer-io.write_kbytes_sec', 'homer-net.out_errors_sec', 'homer-cpu.system_perc', 'ellis-io.read_kbytes_sec', 'sprout-load.avg_1_min', 'sprout-cpu.system_perc', 'ralf-cpu.stolen_perc', 'bono-mem.total_mb', 'bono-net.out_errors_sec', 'ellis-io.write_time_sec', 'ralf-io.read_time_sec', 'sprout-cpu.wait_perc', 'ellis-cpu.wait_perc', 'ralf-disk.space_used_perc', 'ralf-net.out_bytes_sec', 'ellis-net.in_packets_dropped_sec', 'homer-net.in_bytes_sec', 'ellis-net.in_bytes_sec', 'bono-cpu.wait_perc', 'ralf-net.in_packets_sec', 'sprout-mem.total_mb', 'ralf-net.in_bytes_sec', 'bono-load.avg_1_min', 'sprout-net.in_packets_sec', 'bono-io.write_req_sec', 'ralf-load.avg_5_min', 'ralf-net.in_errors_sec', 'bono-disk.inode_used_perc', 'homestead-io.write_time_sec', 'ellis-net.out_packets_sec', 'sprout-disk.space_used_perc', 'ralf-io.read_kbytes_sec', 'homestead-cpu.system_perc', 'sprout-mem.free_mb', 'homer-net.in_errors_sec', 'homestead-net.out_errors_sec', 'homer-io.write_req_sec', 'sprout-net.in_errors_sec', 'ellis-disk.space_used_perc', 'sprout-net.out_packets_sec', 'sprout-net.in_packets_dropped_sec', 'ralf-cpu.system_perc', 'ralf-mem.free_mb', 'bono-io.read_req_sec', 'bono-net.in_packets_sec', 'homestead-cpu.wait_perc', 'ellis-load.avg_1_min'])"
109 "execution_count": null,
115 "dframesorted = dframe.sort_index(axis=1, ascending=True, inplace=False, kind='quicksort')"
120 "execution_count": null,
123 "base_uri": "https://localhost:8080/",
126 "id": "76XHMM3cfWGW",
127 "outputId": "87965d6f-7c2a-4c39-ed9e-f372a0ade7ca"
131 "dframesorted.head()"
136 "execution_count": null,
142 "#dframesorted.describe()"
147 "execution_count": null,
150 "base_uri": "https://localhost:8080/"
152 "id": "vnThYldxXdaw",
153 "outputId": "696f7fee-55eb-4d38-d471-17d1dd4e2e7e"
157 "# here we print the name col to select later metrics per server. \n",
158 "print('Column names are: ',list(dframesorted.columns))"
162 "cell_type": "markdown",
167 "# **X_126bis / all data with less 30 features and csv format**\n",
172 "net.in_errors_sec\n",
174 "net.in_packets_dropped_sec\n",
181 "execution_count": null,
184 "base_uri": "https://localhost:8080/"
186 "id": "HE4xoUdNXgLi",
187 "outputId": "16443f8c-111b-4023-d192-345e02c5a49f"
191 "X_126bis = dframesorted.drop(['sprout-cpu.stolen_perc', 'sprout-mem.total_mb', 'sprout-net.in_errors_sec', 'sprout-net.in_packets_dropped_sec', 'sprout-net.out_errors_sec','homer-cpu.stolen_perc', 'homer-mem.total_mb', 'homer-net.in_errors_sec', 'homer-net.in_packets_dropped_sec', 'homer-net.out_errors_sec','ellis-cpu.stolen_perc', 'ellis-mem.total_mb', 'ellis-net.in_errors_sec', 'ellis-net.in_packets_dropped_sec', 'ellis-net.out_errors_sec', 'bono-cpu.stolen_perc', 'bono-mem.total_mb', 'bono-net.in_errors_sec', 'bono-net.in_packets_dropped_sec', 'bono-net.out_errors_sec', 'ralf-cpu.stolen_perc', 'ralf-mem.total_mb', 'ralf-net.in_errors_sec', 'ralf-net.in_packets_dropped_sec', 'ralf-net.out_errors_sec', 'homestead-cpu.stolen_perc', 'homestead-mem.total_mb', 'homestead-net.in_errors_sec', 'homestead-net.in_packets_dropped_sec', 'homestead-net.out_errors_sec'], axis =1)\n",
193 "dframesorted.shape, X_126bis.shape"
198 "execution_count": null,
204 "X_126bis.to_csv('X_126bis.csv', sep=',')"
208 "cell_type": "markdown",
213 "# **df_Ellis.csv / extract the ellis server metrics**\n",
214 "1) Ellis Server with 26 metrics\n",
216 "2) subselection of the Ellis metrics"
221 "execution_count": null,
227 "# this function select column from the global df and create a new df with them\n",
228 "def select_columns(data_frame, column_names):\n",
229 " new_frame = data_frame.loc[:, column_names]\n",
230 " return new_frame\n",
232 "selected_columns = ['ellis-cpu.idle_perc', 'ellis-cpu.stolen_perc', 'ellis-cpu.system_perc', 'ellis-cpu.wait_perc', 'ellis-disk.inode_used_perc', 'ellis-disk.space_used_perc', 'ellis-io.read_kbytes_sec', 'ellis-io.read_req_sec', 'ellis-io.read_time_sec', 'ellis-io.write_kbytes_sec', 'ellis-io.write_req_sec', 'ellis-io.write_time_sec', 'ellis-load.avg_15_min', 'ellis-load.avg_1_min', 'ellis-load.avg_5_min', 'ellis-mem.free_mb', 'ellis-mem.total_mb', 'ellis-mem.usable_mb', 'ellis-mem.usable_perc', 'ellis-net.in_bytes_sec', 'ellis-net.in_errors_sec', 'ellis-net.in_packets_dropped_sec', 'ellis-net.in_packets_sec', 'ellis-net.out_bytes_sec', 'ellis-net.out_errors_sec', 'ellis-net.out_packets_sec']\n",
233 "df_Ellis = select_columns(dframesorted, selected_columns)"
238 "execution_count": null,
241 "base_uri": "https://localhost:8080/",
244 "id": "VU27zosAYA0b",
245 "outputId": "32ed0bbb-0386-40c8-ba57-0787d3afda76"
253 "cell_type": "markdown",
258 "# **df_Ellis_7 / focus on the main 6 metrics**"
263 "execution_count": null,
269 "selected_columns= ['ellis-load.avg_1_min', 'ellis-cpu.wait_perc', 'ellis-net.out_packets_sec', 'ellis-cpu.system_perc', 'ellis-net.in_bytes_sec', 'ellis-mem.free_mb']\n",
270 "df_Ellis_7 = select_columns(df_Ellis, selected_columns)"
275 "execution_count": null,
278 "base_uri": "https://localhost:8080/",
281 "id": "7y_uHZ5TYJiD",
282 "outputId": "5aef0247-67d8-4d19-c337-54075e096e07"
286 "df_Ellis_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/df_Ellis.csv')\n",
292 "execution_count": null,
295 "base_uri": "https://localhost:8080/"
297 "id": "e9c4-7mZYLpq",
298 "outputId": "020a0e5f-a63a-4918-db1b-a9e1081ce38e"
307 "execution_count": null,
310 "base_uri": "https://localhost:8080/",
313 "id": "iFy_Het0cqh7",
314 "outputId": "ad2ea7ad-1c70-4539-a558-bee07b0a8a19"
318 "timestamp = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/timestamp.csv\")\n",
324 "execution_count": null,
330 "df1 = timestamp[\"Timestamp\"]\n",
332 "df1.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/TimestampNew.csv')"
337 "execution_count": null,
340 "base_uri": "https://localhost:8080/"
342 "id": "dcpx8F6ReZzb",
343 "outputId": "7009dff8-ee1a-43ee-b990-1bfba586cce9"
352 "execution_count": null,
358 "#df_Ellis_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/df_EllisTime.csv') "
363 "execution_count": null,
366 "base_uri": "https://localhost:8080/",
369 "id": "2TddekZAfiad",
370 "outputId": "be25c51a-d1c0-4da8-9838-72fb59d85369"
379 "execution_count": null,
385 "# df_Ellis_7.describe()"
390 "execution_count": null,
396 "# investigate why we need this float transformation. \n",
397 "df_Ellis_7 = df_Ellis_7.astype(np.float)"
402 "execution_count": null,
405 "base_uri": "https://localhost:8080/",
408 "id": "mWxDJNMwtGYs",
409 "outputId": "73a4a63a-4bd6-4ef0-b140-4ef299152fa2"
413 "# we show here the hist\n",
414 "df_Ellis_7.hist(bins=100,figsize=(20,15))\n",
415 "#save_fig(\"attribute_histogram_plots\")\n",
421 "execution_count": null,
427 "df_Ellis_7.to_csv('df_Ellis_7.csv', sep=';')"
432 "execution_count": null,
435 "base_uri": "https://localhost:8080/",
438 "id": "sObmuXeWtLL0",
439 "outputId": "e2bbea48-8fb3-4671-a4dc-5f9f2e464b59"
443 "# we show here the boxplot\n",
444 "plt.figure(figsize=(20,20))\n",
445 "#df_Ellis_7.boxplot(figsize=(20,20))\n",
446 "ax = sns.boxplot(x=\"variable\", y=\"value\", data=pd.melt(df_Ellis_7))"
451 "execution_count": null,
457 "# the gray related metrics will be dropped when using the df_Ellis\n",
459 "#del df_Ellis['ellis-cpu.stolen_perc']\n",
460 "#del df_Ellis['ellis-mem.total_mb']\n",
461 "#del df_Ellis['ellis-net.in_errors_sec']\n",
462 "#del df_Ellis['ellis-net.in_packets_dropped_sec']\n",
463 "#del df_Ellis['ellis-net.out_errors_sec']"
468 "execution_count": null,
471 "base_uri": "https://localhost:8080/",
474 "id": "pcmqBw0gtUT5",
475 "outputId": "b0da529e-8895-483a-d7d1-e450fc6762e0"
479 "# we establish the corrmartrice\n",
480 "correaltionMatrice = df_Ellis_7.corr()\n",
481 "f, ax = plt.subplots(figsize=(30, 20))\n",
482 "sns.heatmap(correaltionMatrice, cbar=True, vmin=0, vmax=1, square=True, annot=True);\n",
488 "execution_count": null,
491 "base_uri": "https://localhost:8080/",
494 "id": "7yZNvlQ2tWlu",
495 "outputId": "dc56cd65-e5e5-4179-aac9-6c6aac43c0eb"
499 "mask = np.zeros_like(correaltionMatrice)\n",
500 "mask[np.triu_indices_from(mask)] = True\n",
501 "with sns.axes_style(\"white\"):\n",
502 " ax = sns.heatmap(correaltionMatrice, mask=mask, vmin=0,vmax=1, square=True)\n",
508 "execution_count": null,
511 "base_uri": "https://localhost:8080/",
514 "id": "Whxt9FahtZ6i",
515 "outputId": "f6dd23c3-82b5-4dd1-9f18-6bd373ff9322"
519 "df_Ellis_7.shape\n",
525 "execution_count": null,
528 "base_uri": "https://localhost:8080/",
531 "id": "Tmzw5MYctb3h",
532 "outputId": "65c21482-29fa-42e3-eb8f-50414bb9c656"
536 "# we show here the scatter_matrix\n",
537 "from pandas.plotting import scatter_matrix\n",
538 "scatter_matrix(df_Ellis_7, alpha=0.2, figsize=(30,30))"
543 "execution_count": null,
546 "base_uri": "https://localhost:8080/",
549 "id": "kcMuENePteVx",
550 "outputId": "7ed8fd61-a02c-4a01-f661-69938c0028ad"
554 "# we show here the scatter_matrix (kde)\n",
556 "scatter_matrix(df_Ellis_7, alpha=0.2, figsize=(30, 30), diagonal='kde')"
561 "execution_count": null,
564 "base_uri": "https://localhost:8080/",
567 "id": "thrxP0LQth88",
568 "outputId": "d15b9169-79b7-47a0-e19f-b8477dd2b287"
572 "#scatterplot the most obvious variable related to SalePrice\n",
573 "sns.pairplot(df_Ellis_7, size = 2.5)\n",
578 "cell_type": "markdown",
588 "execution_count": null,
594 "# this function select column from the global df and create a new df with them\n",
595 "def select_columns(data_frame, column_names):\n",
596 " new_frame = data_frame.loc[:, column_names]\n",
597 " return new_frame\n",
599 "selected_columns = ['bono-cpu.idle_perc', 'bono-cpu.stolen_perc', 'bono-cpu.system_perc', 'bono-cpu.wait_perc', 'bono-disk.inode_used_perc', 'bono-disk.space_used_perc', 'bono-io.read_kbytes_sec', 'bono-io.read_req_sec', 'bono-io.read_time_sec', 'bono-io.write_kbytes_sec', 'bono-io.write_req_sec', 'bono-io.write_time_sec', 'bono-load.avg_15_min', 'bono-load.avg_1_min', 'bono-load.avg_5_min', 'bono-mem.free_mb', 'bono-mem.total_mb', 'bono-mem.usable_mb', 'bono-mem.usable_perc', 'bono-net.in_bytes_sec', 'bono-net.in_errors_sec', 'bono-net.in_packets_dropped_sec', 'bono-net.in_packets_sec', 'bono-net.out_bytes_sec', 'bono-net.out_errors_sec', 'bono-net.out_packets_sec']\n",
600 "df_Bono = select_columns(dframesorted, selected_columns)"
605 "execution_count": null,
608 "base_uri": "https://localhost:8080/",
611 "id": "EKi0wnvUIlZb",
612 "outputId": "4e7d2685-b09c-43ae-9217-d7210a253950"
621 "execution_count": null,
627 "selected_columns= ['bono-load.avg_1_min', 'bono-cpu.wait_perc', 'bono-net.out_packets_sec', 'bono-cpu.system_perc', 'bono-net.in_bytes_sec', 'bono-mem.free_mb']\n",
628 "df_Bono_7 = select_columns(df_Bono, selected_columns)"
633 "execution_count": null,
636 "base_uri": "https://localhost:8080/",
639 "id": "9ZyhiI6AJo1Y",
640 "outputId": "9308469b-ba98-4152-dafa-308d3b6f7b04"
644 "df_Bono_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/bono/df_Bono.csv')\n",
650 "execution_count": null,
653 "base_uri": "https://localhost:8080/"
655 "id": "4ZUXUW35J2fr",
656 "outputId": "fc474730-e953-4f09-8633-ece4e493d9de"
664 "cell_type": "markdown",
674 "execution_count": null,
680 "# this function select column from the global df and create a new df with them\n",
681 "def select_columns(data_frame, column_names):\n",
682 " new_frame = data_frame.loc[:, column_names]\n",
683 " return new_frame\n",
685 "selected_columns = ['sprout-cpu.idle_perc', 'sprout-cpu.stolen_perc', 'sprout-cpu.system_perc', 'sprout-cpu.wait_perc', 'sprout-disk.inode_used_perc', 'sprout-disk.space_used_perc', 'sprout-io.read_kbytes_sec', 'sprout-io.read_req_sec', 'sprout-io.read_time_sec', 'sprout-io.write_kbytes_sec', 'sprout-io.write_req_sec', 'sprout-io.write_time_sec', 'sprout-load.avg_15_min', 'sprout-load.avg_1_min', 'sprout-load.avg_5_min', 'sprout-mem.free_mb', 'sprout-mem.total_mb', 'sprout-mem.usable_mb', 'sprout-mem.usable_perc', 'sprout-net.in_bytes_sec', 'sprout-net.in_errors_sec', 'sprout-net.in_packets_dropped_sec', 'sprout-net.in_packets_sec', 'sprout-net.out_bytes_sec', 'sprout-net.out_errors_sec', 'sprout-net.out_packets_sec']\n",
686 "df_Sprout = select_columns(dframesorted, selected_columns)"
691 "execution_count": null,
694 "base_uri": "https://localhost:8080/",
697 "id": "wjdR0R7YMOyT",
698 "outputId": "4600cd66-0456-4da2-8723-17b5079f7f6b"
707 "execution_count": null,
713 "selected_columns= ['sprout-load.avg_1_min', 'sprout-cpu.wait_perc', 'sprout-net.out_packets_sec', 'sprout-cpu.system_perc', 'sprout-net.in_bytes_sec', 'sprout-mem.free_mb']\n",
714 "df_Sprout_7 = select_columns(df_Sprout, selected_columns)"
719 "execution_count": null,
722 "base_uri": "https://localhost:8080/",
725 "id": "fP5NM3VjM0Uw",
726 "outputId": "163390b3-9ce0-406a-d9c9-88687d696c66"
730 "df_Sprout_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/sprout/df_Sprout.csv')\n",
736 "execution_count": null,
739 "base_uri": "https://localhost:8080/"
741 "id": "Ybl5WrSYM9Q0",
742 "outputId": "50f23729-a029-440f-dc6f-828308bb342c"
750 "cell_type": "markdown",
760 "execution_count": null,
766 "# this function select column from the global df and create a new df with them\n",
767 "def select_columns(data_frame, column_names):\n",
768 " new_frame = data_frame.loc[:, column_names]\n",
769 " return new_frame\n",
771 "selected_columns = ['homestead-cpu.idle_perc', 'homestead-cpu.stolen_perc', 'homestead-cpu.system_perc', 'homestead-cpu.wait_perc', 'homestead-disk.inode_used_perc', 'homestead-disk.space_used_perc', 'homestead-io.read_kbytes_sec', 'homestead-io.read_req_sec', 'homestead-io.read_time_sec', 'homestead-io.write_kbytes_sec', 'homestead-io.write_req_sec', 'homestead-io.write_time_sec', 'homestead-load.avg_15_min', 'homestead-load.avg_1_min', 'homestead-load.avg_5_min', 'homestead-mem.free_mb', 'homestead-mem.total_mb', 'homestead-mem.usable_mb', 'homestead-mem.usable_perc', 'homestead-net.in_bytes_sec', 'homestead-net.in_errors_sec', 'homestead-net.in_packets_dropped_sec', 'homestead-net.in_packets_sec', 'homestead-net.out_bytes_sec', 'homestead-net.out_errors_sec', 'homestead-net.out_packets_sec']\n",
772 "df_Homestead = select_columns(dframesorted, selected_columns)"
777 "execution_count": null,
780 "base_uri": "https://localhost:8080/",
783 "id": "Qc5vd1CVNas2",
784 "outputId": "8b6ab0c1-d15d-4545-e69b-dd39facc5915"
788 "df_Homestead.head()"
793 "execution_count": null,
799 "selected_columns= ['homestead-load.avg_1_min', 'homestead-cpu.wait_perc', 'homestead-net.out_packets_sec', 'homestead-cpu.system_perc', 'homestead-net.in_bytes_sec', 'homestead-mem.free_mb']\n",
800 "df_Homestead_7 = select_columns(df_Homestead, selected_columns)"
805 "execution_count": null,
808 "base_uri": "https://localhost:8080/",
811 "id": "03Y_KhJVNuOC",
812 "outputId": "ab3eef43-6a18-4c61-926a-43afac19d7a5"
816 "df_Homestead_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/homestead/df_Homestead.csv')\n",
817 "df_Homestead_7.head()"
822 "execution_count": null,
825 "base_uri": "https://localhost:8080/"
827 "id": "CVdMWcneN7ix",
828 "outputId": "f3e00de9-f60d-44d0-ae5b-9c4eda033687"
832 "df_Homestead_7.info()"
836 "cell_type": "markdown",
846 "execution_count": null,
852 "# this function select column from the global df and create a new df with them\n",
853 "def select_columns(data_frame, column_names):\n",
854 " new_frame = data_frame.loc[:, column_names]\n",
855 " return new_frame\n",
857 "selected_columns = ['ralf-cpu.idle_perc', 'ralf-cpu.stolen_perc', 'ralf-cpu.system_perc', 'ralf-cpu.wait_perc', 'ralf-disk.inode_used_perc', 'ralf-disk.space_used_perc', 'ralf-io.read_kbytes_sec', 'ralf-io.read_req_sec', 'ralf-io.read_time_sec', 'ralf-io.write_kbytes_sec', 'ralf-io.write_req_sec', 'ralf-io.write_time_sec', 'ralf-load.avg_15_min', 'ralf-load.avg_1_min', 'ralf-load.avg_5_min', 'ralf-mem.free_mb', 'ralf-mem.total_mb', 'ralf-mem.usable_mb', 'ralf-mem.usable_perc', 'ralf-net.in_bytes_sec', 'ralf-net.in_errors_sec', 'ralf-net.in_packets_dropped_sec', 'ralf-net.in_packets_sec', 'ralf-net.out_bytes_sec', 'ralf-net.out_errors_sec', 'ralf-net.out_packets_sec']\n",
858 "df_Ralf = select_columns(dframesorted, selected_columns)"
863 "execution_count": null,
866 "base_uri": "https://localhost:8080/",
869 "id": "XlYi3QBlORiI",
870 "outputId": "6a4740f0-b2c8-4e48-9067-c17c7a267b0d"
879 "execution_count": null,
885 "selected_columns= ['ralf-load.avg_1_min', 'ralf-cpu.wait_perc', 'ralf-net.out_packets_sec', 'ralf-cpu.system_perc', 'ralf-net.in_bytes_sec', 'ralf-mem.free_mb']\n",
886 "df_Ralf_7 = select_columns(df_Ralf, selected_columns)"
891 "execution_count": null,
894 "base_uri": "https://localhost:8080/",
897 "id": "u3v26LoiOi5L",
898 "outputId": "441d54ec-a644-446e-dc5b-e7ad5ec58456"
902 "df_Ralf_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/ralf/df_Ralf.csv')\n",
908 "execution_count": null,
911 "base_uri": "https://localhost:8080/"
913 "id": "mR7Fg1w8OtnU",
914 "outputId": "141c6b09-3c69-45cc-e723-728e1f0e8e68"
922 "cell_type": "markdown",
932 "execution_count": null,
938 "# this function select column from the global df and create a new df with them\n",
939 "def select_columns(data_frame, column_names):\n",
940 " new_frame = data_frame.loc[:, column_names]\n",
941 " return new_frame\n",
943 "selected_columns = ['homer-cpu.idle_perc', 'homer-cpu.stolen_perc', 'homer-cpu.system_perc', 'homer-cpu.wait_perc', 'homer-disk.inode_used_perc', 'homer-disk.space_used_perc', 'homer-io.read_kbytes_sec', 'homer-io.read_req_sec', 'homer-io.read_time_sec', 'homer-io.write_kbytes_sec', 'homer-io.write_req_sec', 'homer-io.write_time_sec', 'homer-load.avg_15_min', 'homer-load.avg_1_min', 'homer-load.avg_5_min', 'homer-mem.free_mb', 'homer-mem.total_mb', 'homer-mem.usable_mb', 'homer-mem.usable_perc', 'homer-net.in_bytes_sec', 'homer-net.in_errors_sec', 'homer-net.in_packets_dropped_sec', 'homer-net.in_packets_sec', 'homer-net.out_bytes_sec', 'homer-net.out_errors_sec', 'homer-net.out_packets_sec']\n",
944 "df_Homer = select_columns(dframesorted, selected_columns)"
949 "execution_count": null,
952 "base_uri": "https://localhost:8080/",
955 "id": "cwMKYQWUPKpl",
956 "outputId": "ca108468-0c41-4f44-aef0-8c63239c9fd5"
965 "execution_count": null,
971 "selected_columns= ['homer-load.avg_1_min', 'homer-cpu.wait_perc', 'homer-net.out_packets_sec', 'homer-cpu.system_perc', 'homer-net.in_bytes_sec', 'homer-mem.free_mb']\n",
972 "df_Homer_7 = select_columns(df_Homer, selected_columns)"
977 "execution_count": null,
980 "base_uri": "https://localhost:8080/",
983 "id": "VZw7WV7tPd9i",
984 "outputId": "39dc88dd-eb89-45b0-a0d3-f261aab6bbd1"
988 "df_Homer_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/homer/df_Homer.csv')\n",
994 "execution_count": null,
997 "base_uri": "https://localhost:8080/"
999 "id": "E1Cqq8V3PnEv",
1000 "outputId": "9a80be17-5bb5-4f8d-a61c-d3206e730309"
1010 "name": "vIMS_Visualization.ipynb",
1014 "display_name": "Python 3 (ipykernel)",
1015 "language": "python",
1019 "codemirror_mode": {
1023 "file_extension": ".py",
1024 "mimetype": "text/x-python",
1026 "nbconvert_exporter": "python",
1027 "pygments_lexer": "ipython3",