models/failure_prediction/jnotebooks/vIMS_Visualization.ipynb

   1 {
   2  "cells": [
   3   {
   4    "cell_type": "markdown",
   5    "metadata": {
   6     "id": "pRQZOrAplLuo"
   7    },
   8    "source": [
   9     "Contributors: **Rohit Singh Rathaur, Girish L.** \n",
  10     "\n",
  11     "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n",
  12     "\n",
  13     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
  14     "you may not use this file except in compliance with the License.\n",
  15     "You may obtain a copy of the License at\n",
  16     "\n",
  17     "    http://www.apache.org/licenses/LICENSE-2.0\n",
  18     "\n",
  19     "Unless required by applicable law or agreed to in writing, software\n",
  20     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
  21     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
  22     "See the License for the specific language governing permissions and\n",
  23     "limitations under the License."
  24    ]
  25   },
  26   {
  27    "cell_type": "code",
  28    "execution_count": null,
  29    "metadata": {
  30     "id": "6rUjno0va6DX"
  31    },
  32    "outputs": [],
  33    "source": [
  34     "#import some necessary librairies\n",
  35     "\n",
  36     "import numpy as np # linear algebra\n",
  37     "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)<\n",
  38     "\n",
  39     "\n",
  40     "# To plot pretty figures\n",
  41     "%matplotlib inline\n",
  42     "import matplotlib.pyplot as plt\n",
  43     "plt.rcParams['axes.labelsize'] = 14\n",
  44     "plt.rcParams['xtick.labelsize'] = 12\n",
  45     "plt.rcParams['ytick.labelsize'] = 12\n",
  46     "\n",
  47     "\n",
  48     "import seaborn as sns\n",
  49     "color = sns.color_palette()\n",
  50     "sns.set_style('darkgrid')\n",
  51     "\n",
  52     "import warnings\n",
  53     "def ignore_warn(*args, **kwargs):\n",
  54     "    pass\n",
  55     "warnings.warn = ignore_warn #ignore annoying warning (from sklearn and seaborn)\n",
  56     "\n",
  57     "\n",
  58     "from scipy import stats\n",
  59     "from scipy.stats import norm, skew #for some statistics\n",
  60     "\n",
  61     "\n",
  62     "pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x)) #Limiting floats output to 3 decimal points\n",
  63     "\n",
  64     "\n",
  65     "from subprocess import check_output\n",
  66     "#print(check_output([\"ls\", \"../input\"]).decode(\"utf8\")) #check the files available in the directory"
  67    ]
  68   },
  69   {
  70    "cell_type": "markdown",
  71    "metadata": {
  72     "id": "K4CRikWVbT1d"
  73    },
  74    "source": [
  75     "# **X.npy data /all data stored in the npy format**"
  76    ]
  77   },
  78   {
  79    "cell_type": "code",
  80    "execution_count": null,
  81    "metadata": {
  82     "colab": {
  83      "base_uri": "https://localhost:8080/"
  84     },
  85     "id": "geh5BNM3bhmT",
  86     "outputId": "0f5af44d-de19-438b-e4de-5f703c59a687"
  87    },
  88    "outputs": [],
  89    "source": [
  90     "from google.colab import drive\n",
  91     "drive.mount('/gdrive')"
  92    ]
  93   },
  94   {
  95    "cell_type": "code",
  96    "execution_count": null,
  97    "metadata": {
  98     "id": "PanwhFGBbDV7"
  99    },
 100    "outputs": [],
 101    "source": [
 102     "# we are here loading the all dataset and showing the all features and sort them per server\n",
 103     "X = np.load('/gdrive/MyDrive/LFN Anuket/Analysis/data/X.npy', allow_pickle=True)\n",
 104     "dframe = pd.DataFrame(data=X,columns=['ellis-cpu.idle_perc', 'ralf-load.avg_15_min', 'bono-net.in_errors_sec', 'homer-net.out_bytes_sec', 'ellis-io.write_req_sec', 'homer-mem.total_mb', 'homestead-load.avg_1_min', 'homer-load.avg_1_min', 'sprout-cpu.stolen_perc', 'ralf-cpu.idle_perc', 'sprout-io.read_req_sec', 'homestead-net.in_bytes_sec', 'homer-disk.space_used_perc', 'bono-net.out_packets_sec', 'homer-cpu.wait_perc', 'ellis-net.in_packets_sec', 'bono-mem.free_mb', 'ellis-io.read_req_sec', 'bono-mem.usable_mb', 'bono-net.in_packets_dropped_sec', 'homestead-mem.free_mb', 'homer-io.write_time_sec', 'sprout-io.write_time_sec', 'homestead-net.in_errors_sec', 'homestead-mem.usable_perc', 'homestead-net.in_packets_dropped_sec', 'homestead-io.write_req_sec', 'bono-net.in_bytes_sec', 'homestead-disk.space_used_perc', 'homer-net.out_packets_sec', 'bono-mem.usable_perc', 'ralf-net.out_errors_sec', 'homestead-load.avg_5_min', 'sprout-io.read_kbytes_sec', 'sprout-net.out_errors_sec', 'homestead-io.write_kbytes_sec', 'homestead-net.in_packets_sec', 'sprout-mem.usable_mb', 'homestead-cpu.idle_perc', 'ralf-io.write_time_sec', 'ralf-io.write_kbytes_sec', 'ralf-io.write_req_sec', 'ellis-net.out_bytes_sec', 'bono-io.read_kbytes_sec', 'bono-disk.space_used_perc', 'homer-net.in_packets_dropped_sec', 'ralf-mem.usable_mb', 'bono-load.avg_15_min', 'bono-io.read_time_sec', 'sprout-mem.usable_perc', 'bono-cpu.idle_perc', 'homer-mem.usable_perc', 'homestead-cpu.stolen_perc', 'ralf-io.read_req_sec', 'homer-cpu.idle_perc', 'homestead-mem.total_mb', 'ralf-load.avg_1_min', 'homer-io.read_kbytes_sec', 'homestead-io.read_req_sec', 'ellis-mem.free_mb', 'bono-io.write_time_sec', 'ellis-net.out_errors_sec', 'ellis-cpu.stolen_perc', 'ellis-mem.usable_perc', 'ralf-disk.inode_used_perc', 'sprout-load.avg_15_min', 'ellis-io.read_time_sec', 'ralf-net.out_packets_sec', 'sprout-io.write_req_sec', 'bono-cpu.stolen_perc', 'homestead-load.avg_15_min', 'bono-cpu.system_perc', 'homestead-net.out_packets_sec', 'ellis-io.write_kbytes_sec', 'sprout-cpu.idle_perc', 'ellis-mem.total_mb', 'homer-mem.usable_mb', 'bono-load.avg_5_min', 'ellis-load.avg_5_min', 'homer-cpu.stolen_perc', 'sprout-net.out_bytes_sec', 'homestead-mem.usable_mb', 'homestead-disk.inode_used_perc', 'ralf-net.in_packets_dropped_sec', 'sprout-io.write_kbytes_sec', 'ellis-load.avg_15_min', 'homer-load.avg_5_min', 'ralf-mem.usable_perc', 'bono-net.out_bytes_sec', 'ellis-cpu.system_perc', 'homer-io.read_time_sec', 'ellis-disk.inode_used_perc', 'homestead-io.read_time_sec', 'sprout-net.in_bytes_sec', 'bono-io.write_kbytes_sec', 'homestead-io.read_kbytes_sec', 'ellis-net.in_errors_sec', 'sprout-io.read_time_sec', 'homer-disk.inode_used_perc', 'ralf-cpu.wait_perc', 'homer-load.avg_15_min', 'sprout-load.avg_5_min', 'homer-io.read_req_sec', 'ralf-mem.total_mb', 'homer-mem.free_mb', 'homer-net.in_packets_sec', 'homestead-net.out_bytes_sec', 'sprout-disk.inode_used_perc', 'ellis-mem.usable_mb', 'homer-io.write_kbytes_sec', 'homer-net.out_errors_sec', 'homer-cpu.system_perc', 'ellis-io.read_kbytes_sec', 'sprout-load.avg_1_min', 'sprout-cpu.system_perc', 'ralf-cpu.stolen_perc', 'bono-mem.total_mb', 'bono-net.out_errors_sec', 'ellis-io.write_time_sec', 'ralf-io.read_time_sec', 'sprout-cpu.wait_perc', 'ellis-cpu.wait_perc', 'ralf-disk.space_used_perc', 'ralf-net.out_bytes_sec', 'ellis-net.in_packets_dropped_sec', 'homer-net.in_bytes_sec', 'ellis-net.in_bytes_sec', 'bono-cpu.wait_perc', 'ralf-net.in_packets_sec', 'sprout-mem.total_mb', 'ralf-net.in_bytes_sec', 'bono-load.avg_1_min', 'sprout-net.in_packets_sec', 'bono-io.write_req_sec', 'ralf-load.avg_5_min', 'ralf-net.in_errors_sec', 'bono-disk.inode_used_perc', 'homestead-io.write_time_sec', 'ellis-net.out_packets_sec', 'sprout-disk.space_used_perc', 'ralf-io.read_kbytes_sec', 'homestead-cpu.system_perc', 'sprout-mem.free_mb', 'homer-net.in_errors_sec', 'homestead-net.out_errors_sec', 'homer-io.write_req_sec', 'sprout-net.in_errors_sec', 'ellis-disk.space_used_perc', 'sprout-net.out_packets_sec', 'sprout-net.in_packets_dropped_sec', 'ralf-cpu.system_perc', 'ralf-mem.free_mb', 'bono-io.read_req_sec', 'bono-net.in_packets_sec', 'homestead-cpu.wait_perc', 'ellis-load.avg_1_min'])"
 105    ]
 106   },
 107   {
 108    "cell_type": "code",
 109    "execution_count": null,
 110    "metadata": {
 111     "id": "nAia0RRkbcSV"
 112    },
 113    "outputs": [],
 114    "source": [
 115     "dframesorted = dframe.sort_index(axis=1, ascending=True, inplace=False, kind='quicksort')"
 116    ]
 117   },
 118   {
 119    "cell_type": "code",
 120    "execution_count": null,
 121    "metadata": {
 122     "colab": {
 123      "base_uri": "https://localhost:8080/",
 124      "height": 270
 125     },
 126     "id": "76XHMM3cfWGW",
 127     "outputId": "87965d6f-7c2a-4c39-ed9e-f372a0ade7ca"
 128    },
 129    "outputs": [],
 130    "source": [
 131     "dframesorted.head()"
 132    ]
 133   },
 134   {
 135    "cell_type": "code",
 136    "execution_count": null,
 137    "metadata": {
 138     "id": "lKCAS0m7fe5F"
 139    },
 140    "outputs": [],
 141    "source": [
 142     "#dframesorted.describe()"
 143    ]
 144   },
 145   {
 146    "cell_type": "code",
 147    "execution_count": null,
 148    "metadata": {
 149     "colab": {
 150      "base_uri": "https://localhost:8080/"
 151     },
 152     "id": "vnThYldxXdaw",
 153     "outputId": "696f7fee-55eb-4d38-d471-17d1dd4e2e7e"
 154    },
 155    "outputs": [],
 156    "source": [
 157     "# here we print the name col to select later metrics per server. \n",
 158     "print('Column names are: ',list(dframesorted.columns))"
 159    ]
 160   },
 161   {
 162    "cell_type": "markdown",
 163    "metadata": {
 164     "id": "8-pffUQBXojo"
 165    },
 166    "source": [
 167     "# **X_126bis / all data with less 30 features and csv format**\n",
 168     "cpu.stolen_perc\n",
 169     "\n",
 170     "mem.total_mb\n",
 171     "\n",
 172     "net.in_errors_sec\n",
 173     "\n",
 174     "net.in_packets_dropped_sec\n",
 175     "\n",
 176     "net.out_errors_sec"
 177    ]
 178   },
 179   {
 180    "cell_type": "code",
 181    "execution_count": null,
 182    "metadata": {
 183     "colab": {
 184      "base_uri": "https://localhost:8080/"
 185     },
 186     "id": "HE4xoUdNXgLi",
 187     "outputId": "16443f8c-111b-4023-d192-345e02c5a49f"
 188    },
 189    "outputs": [],
 190    "source": [
 191     "X_126bis = dframesorted.drop(['sprout-cpu.stolen_perc', 'sprout-mem.total_mb', 'sprout-net.in_errors_sec', 'sprout-net.in_packets_dropped_sec', 'sprout-net.out_errors_sec','homer-cpu.stolen_perc', 'homer-mem.total_mb', 'homer-net.in_errors_sec', 'homer-net.in_packets_dropped_sec', 'homer-net.out_errors_sec','ellis-cpu.stolen_perc', 'ellis-mem.total_mb', 'ellis-net.in_errors_sec', 'ellis-net.in_packets_dropped_sec', 'ellis-net.out_errors_sec', 'bono-cpu.stolen_perc', 'bono-mem.total_mb', 'bono-net.in_errors_sec', 'bono-net.in_packets_dropped_sec', 'bono-net.out_errors_sec', 'ralf-cpu.stolen_perc', 'ralf-mem.total_mb', 'ralf-net.in_errors_sec', 'ralf-net.in_packets_dropped_sec', 'ralf-net.out_errors_sec', 'homestead-cpu.stolen_perc', 'homestead-mem.total_mb', 'homestead-net.in_errors_sec', 'homestead-net.in_packets_dropped_sec', 'homestead-net.out_errors_sec'], axis =1)\n",
 192     "\n",
 193     "dframesorted.shape, X_126bis.shape"
 194    ]
 195   },
 196   {
 197    "cell_type": "code",
 198    "execution_count": null,
 199    "metadata": {
 200     "id": "5_8LHtazXvwf"
 201    },
 202    "outputs": [],
 203    "source": [
 204     "X_126bis.to_csv('X_126bis.csv', sep=',')"
 205    ]
 206   },
 207   {
 208    "cell_type": "markdown",
 209    "metadata": {
 210     "id": "sknAZgiPX6_2"
 211    },
 212    "source": [
 213     "# **df_Ellis.csv / extract the ellis server metrics**\n",
 214     "1) Ellis Server with 26 metrics\n",
 215     "\n",
 216     "2) subselection of the Ellis metrics"
 217    ]
 218   },
 219   {
 220    "cell_type": "code",
 221    "execution_count": null,
 222    "metadata": {
 223     "id": "S6n-hPD3X43P"
 224    },
 225    "outputs": [],
 226    "source": [
 227     "# this function select column from the global df and create a new df with them\n",
 228     "def select_columns(data_frame, column_names):\n",
 229     "    new_frame = data_frame.loc[:, column_names]\n",
 230     "    return new_frame\n",
 231     "\n",
 232     "selected_columns = ['ellis-cpu.idle_perc', 'ellis-cpu.stolen_perc', 'ellis-cpu.system_perc', 'ellis-cpu.wait_perc', 'ellis-disk.inode_used_perc', 'ellis-disk.space_used_perc', 'ellis-io.read_kbytes_sec', 'ellis-io.read_req_sec', 'ellis-io.read_time_sec', 'ellis-io.write_kbytes_sec', 'ellis-io.write_req_sec', 'ellis-io.write_time_sec', 'ellis-load.avg_15_min', 'ellis-load.avg_1_min', 'ellis-load.avg_5_min', 'ellis-mem.free_mb', 'ellis-mem.total_mb', 'ellis-mem.usable_mb', 'ellis-mem.usable_perc', 'ellis-net.in_bytes_sec', 'ellis-net.in_errors_sec', 'ellis-net.in_packets_dropped_sec', 'ellis-net.in_packets_sec', 'ellis-net.out_bytes_sec', 'ellis-net.out_errors_sec', 'ellis-net.out_packets_sec']\n",
 233     "df_Ellis = select_columns(dframesorted, selected_columns)"
 234    ]
 235   },
 236   {
 237    "cell_type": "code",
 238    "execution_count": null,
 239    "metadata": {
 240     "colab": {
 241      "base_uri": "https://localhost:8080/",
 242      "height": 241
 243     },
 244     "id": "VU27zosAYA0b",
 245     "outputId": "32ed0bbb-0386-40c8-ba57-0787d3afda76"
 246    },
 247    "outputs": [],
 248    "source": [
 249     "df_Ellis.head()"
 250    ]
 251   },
 252   {
 253    "cell_type": "markdown",
 254    "metadata": {
 255     "id": "0k96D8aDYFw-"
 256    },
 257    "source": [
 258     "# **df_Ellis_7 / focus on the main 6 metrics**"
 259    ]
 260   },
 261   {
 262    "cell_type": "code",
 263    "execution_count": null,
 264    "metadata": {
 265     "id": "zVxKNXtfYCph"
 266    },
 267    "outputs": [],
 268    "source": [
 269     "selected_columns= ['ellis-load.avg_1_min', 'ellis-cpu.wait_perc', 'ellis-net.out_packets_sec', 'ellis-cpu.system_perc', 'ellis-net.in_bytes_sec', 'ellis-mem.free_mb']\n",
 270     "df_Ellis_7 = select_columns(df_Ellis, selected_columns)"
 271    ]
 272   },
 273   {
 274    "cell_type": "code",
 275    "execution_count": null,
 276    "metadata": {
 277     "colab": {
 278      "base_uri": "https://localhost:8080/",
 279      "height": 204
 280     },
 281     "id": "7y_uHZ5TYJiD",
 282     "outputId": "5aef0247-67d8-4d19-c337-54075e096e07"
 283    },
 284    "outputs": [],
 285    "source": [
 286     "df_Ellis_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/df_Ellis.csv')\n",
 287     "df_Ellis_7.head()"
 288    ]
 289   },
 290   {
 291    "cell_type": "code",
 292    "execution_count": null,
 293    "metadata": {
 294     "colab": {
 295      "base_uri": "https://localhost:8080/"
 296     },
 297     "id": "e9c4-7mZYLpq",
 298     "outputId": "020a0e5f-a63a-4918-db1b-a9e1081ce38e"
 299    },
 300    "outputs": [],
 301    "source": [
 302     "df_Ellis_7.info()"
 303    ]
 304   },
 305   {
 306    "cell_type": "code",
 307    "execution_count": null,
 308    "metadata": {
 309     "colab": {
 310      "base_uri": "https://localhost:8080/",
 311      "height": 204
 312     },
 313     "id": "iFy_Het0cqh7",
 314     "outputId": "ad2ea7ad-1c70-4539-a558-bee07b0a8a19"
 315    },
 316    "outputs": [],
 317    "source": [
 318     "timestamp  = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/timestamp.csv\")\n",
 319     "timestamp.head()"
 320    ]
 321   },
 322   {
 323    "cell_type": "code",
 324    "execution_count": null,
 325    "metadata": {
 326     "id": "q2-Wt45vdTzt"
 327    },
 328    "outputs": [],
 329    "source": [
 330     "df1 = timestamp[\"Timestamp\"]\n",
 331     "df1\n",
 332     "df1.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/TimestampNew.csv')"
 333    ]
 334   },
 335   {
 336    "cell_type": "code",
 337    "execution_count": null,
 338    "metadata": {
 339     "colab": {
 340      "base_uri": "https://localhost:8080/"
 341     },
 342     "id": "dcpx8F6ReZzb",
 343     "outputId": "7009dff8-ee1a-43ee-b990-1bfba586cce9"
 344    },
 345    "outputs": [],
 346    "source": [
 347     "df1.head()"
 348    ]
 349   },
 350   {
 351    "cell_type": "code",
 352    "execution_count": null,
 353    "metadata": {
 354     "id": "mybijUDWfdcH"
 355    },
 356    "outputs": [],
 357    "source": [
 358     "#df_Ellis_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/df_EllisTime.csv') "
 359    ]
 360   },
 361   {
 362    "cell_type": "code",
 363    "execution_count": null,
 364    "metadata": {
 365     "colab": {
 366      "base_uri": "https://localhost:8080/",
 367      "height": 419
 368     },
 369     "id": "2TddekZAfiad",
 370     "outputId": "be25c51a-d1c0-4da8-9838-72fb59d85369"
 371    },
 372    "outputs": [],
 373    "source": [
 374     "df_Ellis_7"
 375    ]
 376   },
 377   {
 378    "cell_type": "code",
 379    "execution_count": null,
 380    "metadata": {
 381     "id": "7gstjLldYT_r"
 382    },
 383    "outputs": [],
 384    "source": [
 385     "# df_Ellis_7.describe()"
 386    ]
 387   },
 388   {
 389    "cell_type": "code",
 390    "execution_count": null,
 391    "metadata": {
 392     "id": "bFMkMFqhs5pm"
 393    },
 394    "outputs": [],
 395    "source": [
 396     "# investigate why we need this float transformation. \n",
 397     "df_Ellis_7 = df_Ellis_7.astype(np.float)"
 398    ]
 399   },
 400   {
 401    "cell_type": "code",
 402    "execution_count": null,
 403    "metadata": {
 404     "colab": {
 405      "base_uri": "https://localhost:8080/",
 406      "height": 882
 407     },
 408     "id": "mWxDJNMwtGYs",
 409     "outputId": "73a4a63a-4bd6-4ef0-b140-4ef299152fa2"
 410    },
 411    "outputs": [],
 412    "source": [
 413     "# we show here the hist\n",
 414     "df_Ellis_7.hist(bins=100,figsize=(20,15))\n",
 415     "#save_fig(\"attribute_histogram_plots\")\n",
 416     "plt.show()"
 417    ]
 418   },
 419   {
 420    "cell_type": "code",
 421    "execution_count": null,
 422    "metadata": {
 423     "id": "dedoBLq_tIG_"
 424    },
 425    "outputs": [],
 426    "source": [
 427     "df_Ellis_7.to_csv('df_Ellis_7.csv', sep=';')"
 428    ]
 429   },
 430   {
 431    "cell_type": "code",
 432    "execution_count": null,
 433    "metadata": {
 434     "colab": {
 435      "base_uri": "https://localhost:8080/",
 436      "height": 1000
 437     },
 438     "id": "sObmuXeWtLL0",
 439     "outputId": "e2bbea48-8fb3-4671-a4dc-5f9f2e464b59"
 440    },
 441    "outputs": [],
 442    "source": [
 443     "# we show here the boxplot\n",
 444     "plt.figure(figsize=(20,20))\n",
 445     "#df_Ellis_7.boxplot(figsize=(20,20))\n",
 446     "ax = sns.boxplot(x=\"variable\", y=\"value\", data=pd.melt(df_Ellis_7))"
 447    ]
 448   },
 449   {
 450    "cell_type": "code",
 451    "execution_count": null,
 452    "metadata": {
 453     "id": "wxNTLGMHtOF_"
 454    },
 455    "outputs": [],
 456    "source": [
 457     "# the gray related metrics will be dropped when using the df_Ellis\n",
 458     "\n",
 459     "#del df_Ellis['ellis-cpu.stolen_perc']\n",
 460     "#del df_Ellis['ellis-mem.total_mb']\n",
 461     "#del df_Ellis['ellis-net.in_errors_sec']\n",
 462     "#del df_Ellis['ellis-net.in_packets_dropped_sec']\n",
 463     "#del df_Ellis['ellis-net.out_errors_sec']"
 464    ]
 465   },
 466   {
 467    "cell_type": "code",
 468    "execution_count": null,
 469    "metadata": {
 470     "colab": {
 471      "base_uri": "https://localhost:8080/",
 472      "height": 1000
 473     },
 474     "id": "pcmqBw0gtUT5",
 475     "outputId": "b0da529e-8895-483a-d7d1-e450fc6762e0"
 476    },
 477    "outputs": [],
 478    "source": [
 479     "# we establish the corrmartrice\n",
 480     "correaltionMatrice = df_Ellis_7.corr()\n",
 481     "f, ax = plt.subplots(figsize=(30, 20))\n",
 482     "sns.heatmap(correaltionMatrice, cbar=True, vmin=0, vmax=1, square=True, annot=True);\n",
 483     "plt.show()"
 484    ]
 485   },
 486   {
 487    "cell_type": "code",
 488    "execution_count": null,
 489    "metadata": {
 490     "colab": {
 491      "base_uri": "https://localhost:8080/",
 492      "height": 411
 493     },
 494     "id": "7yZNvlQ2tWlu",
 495     "outputId": "dc56cd65-e5e5-4179-aac9-6c6aac43c0eb"
 496    },
 497    "outputs": [],
 498    "source": [
 499     "mask = np.zeros_like(correaltionMatrice)\n",
 500     "mask[np.triu_indices_from(mask)] = True\n",
 501     "with sns.axes_style(\"white\"):\n",
 502     "     ax = sns.heatmap(correaltionMatrice, mask=mask, vmin=0,vmax=1, square=True)\n",
 503     "plt.show()"
 504    ]
 505   },
 506   {
 507    "cell_type": "code",
 508    "execution_count": null,
 509    "metadata": {
 510     "colab": {
 511      "base_uri": "https://localhost:8080/",
 512      "height": 204
 513     },
 514     "id": "Whxt9FahtZ6i",
 515     "outputId": "f6dd23c3-82b5-4dd1-9f18-6bd373ff9322"
 516    },
 517    "outputs": [],
 518    "source": [
 519     "df_Ellis_7.shape\n",
 520     "df_Ellis_7.head()"
 521    ]
 522   },
 523   {
 524    "cell_type": "code",
 525    "execution_count": null,
 526    "metadata": {
 527     "colab": {
 528      "base_uri": "https://localhost:8080/",
 529      "height": 1000
 530     },
 531     "id": "Tmzw5MYctb3h",
 532     "outputId": "65c21482-29fa-42e3-eb8f-50414bb9c656"
 533    },
 534    "outputs": [],
 535    "source": [
 536     "# we show here the scatter_matrix\n",
 537     "from pandas.plotting import scatter_matrix\n",
 538     "scatter_matrix(df_Ellis_7, alpha=0.2, figsize=(30,30))"
 539    ]
 540   },
 541   {
 542    "cell_type": "code",
 543    "execution_count": null,
 544    "metadata": {
 545     "colab": {
 546      "base_uri": "https://localhost:8080/",
 547      "height": 1000
 548     },
 549     "id": "kcMuENePteVx",
 550     "outputId": "7ed8fd61-a02c-4a01-f661-69938c0028ad"
 551    },
 552    "outputs": [],
 553    "source": [
 554     "# we show here the scatter_matrix (kde)\n",
 555     "\n",
 556     "scatter_matrix(df_Ellis_7, alpha=0.2, figsize=(30, 30), diagonal='kde')"
 557    ]
 558   },
 559   {
 560    "cell_type": "code",
 561    "execution_count": null,
 562    "metadata": {
 563     "colab": {
 564      "base_uri": "https://localhost:8080/",
 565      "height": 1000
 566     },
 567     "id": "thrxP0LQth88",
 568     "outputId": "d15b9169-79b7-47a0-e19f-b8477dd2b287"
 569    },
 570    "outputs": [],
 571    "source": [
 572     "#scatterplot the most obvious variable related to SalePrice\n",
 573     "sns.pairplot(df_Ellis_7, size = 2.5)\n",
 574     "plt.show();"
 575    ]
 576   },
 577   {
 578    "cell_type": "markdown",
 579    "metadata": {
 580     "id": "HNMDfsExILYz"
 581    },
 582    "source": [
 583     "# **Bono**"
 584    ]
 585   },
 586   {
 587    "cell_type": "code",
 588    "execution_count": null,
 589    "metadata": {
 590     "id": "mxaAX85otljN"
 591    },
 592    "outputs": [],
 593    "source": [
 594     "# this function select column from the global df and create a new df with them\n",
 595     "def select_columns(data_frame, column_names):\n",
 596     "    new_frame = data_frame.loc[:, column_names]\n",
 597     "    return new_frame\n",
 598     "\n",
 599     "selected_columns = ['bono-cpu.idle_perc', 'bono-cpu.stolen_perc', 'bono-cpu.system_perc', 'bono-cpu.wait_perc', 'bono-disk.inode_used_perc', 'bono-disk.space_used_perc', 'bono-io.read_kbytes_sec', 'bono-io.read_req_sec', 'bono-io.read_time_sec', 'bono-io.write_kbytes_sec', 'bono-io.write_req_sec', 'bono-io.write_time_sec', 'bono-load.avg_15_min', 'bono-load.avg_1_min', 'bono-load.avg_5_min', 'bono-mem.free_mb', 'bono-mem.total_mb', 'bono-mem.usable_mb', 'bono-mem.usable_perc', 'bono-net.in_bytes_sec', 'bono-net.in_errors_sec', 'bono-net.in_packets_dropped_sec', 'bono-net.in_packets_sec', 'bono-net.out_bytes_sec', 'bono-net.out_errors_sec', 'bono-net.out_packets_sec']\n",
 600     "df_Bono = select_columns(dframesorted, selected_columns)"
 601    ]
 602   },
 603   {
 604    "cell_type": "code",
 605    "execution_count": null,
 606    "metadata": {
 607     "colab": {
 608      "base_uri": "https://localhost:8080/",
 609      "height": 241
 610     },
 611     "id": "EKi0wnvUIlZb",
 612     "outputId": "4e7d2685-b09c-43ae-9217-d7210a253950"
 613    },
 614    "outputs": [],
 615    "source": [
 616     "df_Bono.head()"
 617    ]
 618   },
 619   {
 620    "cell_type": "code",
 621    "execution_count": null,
 622    "metadata": {
 623     "id": "gGCA3rWuJdQx"
 624    },
 625    "outputs": [],
 626    "source": [
 627     "selected_columns= ['bono-load.avg_1_min', 'bono-cpu.wait_perc', 'bono-net.out_packets_sec', 'bono-cpu.system_perc', 'bono-net.in_bytes_sec', 'bono-mem.free_mb']\n",
 628     "df_Bono_7 = select_columns(df_Bono, selected_columns)"
 629    ]
 630   },
 631   {
 632    "cell_type": "code",
 633    "execution_count": null,
 634    "metadata": {
 635     "colab": {
 636      "base_uri": "https://localhost:8080/",
 637      "height": 204
 638     },
 639     "id": "9ZyhiI6AJo1Y",
 640     "outputId": "9308469b-ba98-4152-dafa-308d3b6f7b04"
 641    },
 642    "outputs": [],
 643    "source": [
 644     "df_Bono_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/bono/df_Bono.csv')\n",
 645     "df_Bono_7.head()"
 646    ]
 647   },
 648   {
 649    "cell_type": "code",
 650    "execution_count": null,
 651    "metadata": {
 652     "colab": {
 653      "base_uri": "https://localhost:8080/"
 654     },
 655     "id": "4ZUXUW35J2fr",
 656     "outputId": "fc474730-e953-4f09-8633-ece4e493d9de"
 657    },
 658    "outputs": [],
 659    "source": [
 660     "df_Bono_7.info()"
 661    ]
 662   },
 663   {
 664    "cell_type": "markdown",
 665    "metadata": {
 666     "id": "QeQ3_oHsJ5BN"
 667    },
 668    "source": [
 669     "# **Sprout**"
 670    ]
 671   },
 672   {
 673    "cell_type": "code",
 674    "execution_count": null,
 675    "metadata": {
 676     "id": "gAQmKdKfJ3st"
 677    },
 678    "outputs": [],
 679    "source": [
 680     "# this function select column from the global df and create a new df with them\n",
 681     "def select_columns(data_frame, column_names):\n",
 682     "    new_frame = data_frame.loc[:, column_names]\n",
 683     "    return new_frame\n",
 684     "\n",
 685     "selected_columns = ['sprout-cpu.idle_perc', 'sprout-cpu.stolen_perc', 'sprout-cpu.system_perc', 'sprout-cpu.wait_perc', 'sprout-disk.inode_used_perc', 'sprout-disk.space_used_perc', 'sprout-io.read_kbytes_sec', 'sprout-io.read_req_sec', 'sprout-io.read_time_sec', 'sprout-io.write_kbytes_sec', 'sprout-io.write_req_sec', 'sprout-io.write_time_sec', 'sprout-load.avg_15_min', 'sprout-load.avg_1_min', 'sprout-load.avg_5_min', 'sprout-mem.free_mb', 'sprout-mem.total_mb', 'sprout-mem.usable_mb', 'sprout-mem.usable_perc', 'sprout-net.in_bytes_sec', 'sprout-net.in_errors_sec', 'sprout-net.in_packets_dropped_sec', 'sprout-net.in_packets_sec', 'sprout-net.out_bytes_sec', 'sprout-net.out_errors_sec', 'sprout-net.out_packets_sec']\n",
 686     "df_Sprout = select_columns(dframesorted, selected_columns)"
 687    ]
 688   },
 689   {
 690    "cell_type": "code",
 691    "execution_count": null,
 692    "metadata": {
 693     "colab": {
 694      "base_uri": "https://localhost:8080/",
 695      "height": 241
 696     },
 697     "id": "wjdR0R7YMOyT",
 698     "outputId": "4600cd66-0456-4da2-8723-17b5079f7f6b"
 699    },
 700    "outputs": [],
 701    "source": [
 702     "df_Sprout.head()"
 703    ]
 704   },
 705   {
 706    "cell_type": "code",
 707    "execution_count": null,
 708    "metadata": {
 709     "id": "6Tfi58LJMUNu"
 710    },
 711    "outputs": [],
 712    "source": [
 713     "selected_columns= ['sprout-load.avg_1_min', 'sprout-cpu.wait_perc', 'sprout-net.out_packets_sec', 'sprout-cpu.system_perc', 'sprout-net.in_bytes_sec', 'sprout-mem.free_mb']\n",
 714     "df_Sprout_7 = select_columns(df_Sprout, selected_columns)"
 715    ]
 716   },
 717   {
 718    "cell_type": "code",
 719    "execution_count": null,
 720    "metadata": {
 721     "colab": {
 722      "base_uri": "https://localhost:8080/",
 723      "height": 204
 724     },
 725     "id": "fP5NM3VjM0Uw",
 726     "outputId": "163390b3-9ce0-406a-d9c9-88687d696c66"
 727    },
 728    "outputs": [],
 729    "source": [
 730     "df_Sprout_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/sprout/df_Sprout.csv')\n",
 731     "df_Sprout_7.head()"
 732    ]
 733   },
 734   {
 735    "cell_type": "code",
 736    "execution_count": null,
 737    "metadata": {
 738     "colab": {
 739      "base_uri": "https://localhost:8080/"
 740     },
 741     "id": "Ybl5WrSYM9Q0",
 742     "outputId": "50f23729-a029-440f-dc6f-828308bb342c"
 743    },
 744    "outputs": [],
 745    "source": [
 746     "df_Sprout_7.info()"
 747    ]
 748   },
 749   {
 750    "cell_type": "markdown",
 751    "metadata": {
 752     "id": "AvKCUZL5NKKz"
 753    },
 754    "source": [
 755     "# **Homestead**"
 756    ]
 757   },
 758   {
 759    "cell_type": "code",
 760    "execution_count": null,
 761    "metadata": {
 762     "id": "EaXOAy7BNF8s"
 763    },
 764    "outputs": [],
 765    "source": [
 766     "# this function select column from the global df and create a new df with them\n",
 767     "def select_columns(data_frame, column_names):\n",
 768     "    new_frame = data_frame.loc[:, column_names]\n",
 769     "    return new_frame\n",
 770     "\n",
 771     "selected_columns = ['homestead-cpu.idle_perc', 'homestead-cpu.stolen_perc', 'homestead-cpu.system_perc', 'homestead-cpu.wait_perc', 'homestead-disk.inode_used_perc', 'homestead-disk.space_used_perc', 'homestead-io.read_kbytes_sec', 'homestead-io.read_req_sec', 'homestead-io.read_time_sec', 'homestead-io.write_kbytes_sec', 'homestead-io.write_req_sec', 'homestead-io.write_time_sec', 'homestead-load.avg_15_min', 'homestead-load.avg_1_min', 'homestead-load.avg_5_min', 'homestead-mem.free_mb', 'homestead-mem.total_mb', 'homestead-mem.usable_mb', 'homestead-mem.usable_perc', 'homestead-net.in_bytes_sec', 'homestead-net.in_errors_sec', 'homestead-net.in_packets_dropped_sec', 'homestead-net.in_packets_sec', 'homestead-net.out_bytes_sec', 'homestead-net.out_errors_sec', 'homestead-net.out_packets_sec']\n",
 772     "df_Homestead = select_columns(dframesorted, selected_columns)"
 773    ]
 774   },
 775   {
 776    "cell_type": "code",
 777    "execution_count": null,
 778    "metadata": {
 779     "colab": {
 780      "base_uri": "https://localhost:8080/",
 781      "height": 241
 782     },
 783     "id": "Qc5vd1CVNas2",
 784     "outputId": "8b6ab0c1-d15d-4545-e69b-dd39facc5915"
 785    },
 786    "outputs": [],
 787    "source": [
 788     "df_Homestead.head()"
 789    ]
 790   },
 791   {
 792    "cell_type": "code",
 793    "execution_count": null,
 794    "metadata": {
 795     "id": "U5e-23VDNgjS"
 796    },
 797    "outputs": [],
 798    "source": [
 799     "selected_columns= ['homestead-load.avg_1_min', 'homestead-cpu.wait_perc', 'homestead-net.out_packets_sec', 'homestead-cpu.system_perc', 'homestead-net.in_bytes_sec', 'homestead-mem.free_mb']\n",
 800     "df_Homestead_7 = select_columns(df_Homestead, selected_columns)"
 801    ]
 802   },
 803   {
 804    "cell_type": "code",
 805    "execution_count": null,
 806    "metadata": {
 807     "colab": {
 808      "base_uri": "https://localhost:8080/",
 809      "height": 221
 810     },
 811     "id": "03Y_KhJVNuOC",
 812     "outputId": "ab3eef43-6a18-4c61-926a-43afac19d7a5"
 813    },
 814    "outputs": [],
 815    "source": [
 816     "df_Homestead_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/homestead/df_Homestead.csv')\n",
 817     "df_Homestead_7.head()"
 818    ]
 819   },
 820   {
 821    "cell_type": "code",
 822    "execution_count": null,
 823    "metadata": {
 824     "colab": {
 825      "base_uri": "https://localhost:8080/"
 826     },
 827     "id": "CVdMWcneN7ix",
 828     "outputId": "f3e00de9-f60d-44d0-ae5b-9c4eda033687"
 829    },
 830    "outputs": [],
 831    "source": [
 832     "df_Homestead_7.info()"
 833    ]
 834   },
 835   {
 836    "cell_type": "markdown",
 837    "metadata": {
 838     "id": "_kw5-s7hOFN3"
 839    },
 840    "source": [
 841     "# **Ralf**"
 842    ]
 843   },
 844   {
 845    "cell_type": "code",
 846    "execution_count": null,
 847    "metadata": {
 848     "id": "v6xYItZWOANR"
 849    },
 850    "outputs": [],
 851    "source": [
 852     "# this function select column from the global df and create a new df with them\n",
 853     "def select_columns(data_frame, column_names):\n",
 854     "    new_frame = data_frame.loc[:, column_names]\n",
 855     "    return new_frame\n",
 856     "\n",
 857     "selected_columns = ['ralf-cpu.idle_perc', 'ralf-cpu.stolen_perc', 'ralf-cpu.system_perc', 'ralf-cpu.wait_perc', 'ralf-disk.inode_used_perc', 'ralf-disk.space_used_perc', 'ralf-io.read_kbytes_sec', 'ralf-io.read_req_sec', 'ralf-io.read_time_sec', 'ralf-io.write_kbytes_sec', 'ralf-io.write_req_sec', 'ralf-io.write_time_sec', 'ralf-load.avg_15_min', 'ralf-load.avg_1_min', 'ralf-load.avg_5_min', 'ralf-mem.free_mb', 'ralf-mem.total_mb', 'ralf-mem.usable_mb', 'ralf-mem.usable_perc', 'ralf-net.in_bytes_sec', 'ralf-net.in_errors_sec', 'ralf-net.in_packets_dropped_sec', 'ralf-net.in_packets_sec', 'ralf-net.out_bytes_sec', 'ralf-net.out_errors_sec', 'ralf-net.out_packets_sec']\n",
 858     "df_Ralf = select_columns(dframesorted, selected_columns)"
 859    ]
 860   },
 861   {
 862    "cell_type": "code",
 863    "execution_count": null,
 864    "metadata": {
 865     "colab": {
 866      "base_uri": "https://localhost:8080/",
 867      "height": 241
 868     },
 869     "id": "XlYi3QBlORiI",
 870     "outputId": "6a4740f0-b2c8-4e48-9067-c17c7a267b0d"
 871    },
 872    "outputs": [],
 873    "source": [
 874     "df_Ralf.head()"
 875    ]
 876   },
 877   {
 878    "cell_type": "code",
 879    "execution_count": null,
 880    "metadata": {
 881     "id": "6l88CRqDOVs-"
 882    },
 883    "outputs": [],
 884    "source": [
 885     "selected_columns= ['ralf-load.avg_1_min', 'ralf-cpu.wait_perc', 'ralf-net.out_packets_sec', 'ralf-cpu.system_perc', 'ralf-net.in_bytes_sec', 'ralf-mem.free_mb']\n",
 886     "df_Ralf_7 = select_columns(df_Ralf, selected_columns)"
 887    ]
 888   },
 889   {
 890    "cell_type": "code",
 891    "execution_count": null,
 892    "metadata": {
 893     "colab": {
 894      "base_uri": "https://localhost:8080/",
 895      "height": 204
 896     },
 897     "id": "u3v26LoiOi5L",
 898     "outputId": "441d54ec-a644-446e-dc5b-e7ad5ec58456"
 899    },
 900    "outputs": [],
 901    "source": [
 902     "df_Ralf_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/ralf/df_Ralf.csv')\n",
 903     "df_Ralf_7.head()"
 904    ]
 905   },
 906   {
 907    "cell_type": "code",
 908    "execution_count": null,
 909    "metadata": {
 910     "colab": {
 911      "base_uri": "https://localhost:8080/"
 912     },
 913     "id": "mR7Fg1w8OtnU",
 914     "outputId": "141c6b09-3c69-45cc-e723-728e1f0e8e68"
 915    },
 916    "outputs": [],
 917    "source": [
 918     "df_Ralf_7.info()"
 919    ]
 920   },
 921   {
 922    "cell_type": "markdown",
 923    "metadata": {
 924     "id": "2-udEbTHO-HS"
 925    },
 926    "source": [
 927     "# **Homer**"
 928    ]
 929   },
 930   {
 931    "cell_type": "code",
 932    "execution_count": null,
 933    "metadata": {
 934     "id": "vVpG5lJiOyYw"
 935    },
 936    "outputs": [],
 937    "source": [
 938     "# this function select column from the global df and create a new df with them\n",
 939     "def select_columns(data_frame, column_names):\n",
 940     "    new_frame = data_frame.loc[:, column_names]\n",
 941     "    return new_frame\n",
 942     "\n",
 943     "selected_columns = ['homer-cpu.idle_perc', 'homer-cpu.stolen_perc', 'homer-cpu.system_perc', 'homer-cpu.wait_perc', 'homer-disk.inode_used_perc', 'homer-disk.space_used_perc', 'homer-io.read_kbytes_sec', 'homer-io.read_req_sec', 'homer-io.read_time_sec', 'homer-io.write_kbytes_sec', 'homer-io.write_req_sec', 'homer-io.write_time_sec', 'homer-load.avg_15_min', 'homer-load.avg_1_min', 'homer-load.avg_5_min', 'homer-mem.free_mb', 'homer-mem.total_mb', 'homer-mem.usable_mb', 'homer-mem.usable_perc', 'homer-net.in_bytes_sec', 'homer-net.in_errors_sec', 'homer-net.in_packets_dropped_sec', 'homer-net.in_packets_sec', 'homer-net.out_bytes_sec', 'homer-net.out_errors_sec', 'homer-net.out_packets_sec']\n",
 944     "df_Homer = select_columns(dframesorted, selected_columns)"
 945    ]
 946   },
 947   {
 948    "cell_type": "code",
 949    "execution_count": null,
 950    "metadata": {
 951     "colab": {
 952      "base_uri": "https://localhost:8080/",
 953      "height": 241
 954     },
 955     "id": "cwMKYQWUPKpl",
 956     "outputId": "ca108468-0c41-4f44-aef0-8c63239c9fd5"
 957    },
 958    "outputs": [],
 959    "source": [
 960     "df_Homer.head()"
 961    ]
 962   },
 963   {
 964    "cell_type": "code",
 965    "execution_count": null,
 966    "metadata": {
 967     "id": "rsUb47imPOyE"
 968    },
 969    "outputs": [],
 970    "source": [
 971     "selected_columns= ['homer-load.avg_1_min', 'homer-cpu.wait_perc', 'homer-net.out_packets_sec', 'homer-cpu.system_perc', 'homer-net.in_bytes_sec', 'homer-mem.free_mb']\n",
 972     "df_Homer_7 = select_columns(df_Homer, selected_columns)"
 973    ]
 974   },
 975   {
 976    "cell_type": "code",
 977    "execution_count": null,
 978    "metadata": {
 979     "colab": {
 980      "base_uri": "https://localhost:8080/",
 981      "height": 204
 982     },
 983     "id": "VZw7WV7tPd9i",
 984     "outputId": "39dc88dd-eb89-45b0-a0d3-f261aab6bbd1"
 985    },
 986    "outputs": [],
 987    "source": [
 988     "df_Homer_7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/homer/df_Homer.csv')\n",
 989     "df_Homer_7.head()"
 990    ]
 991   },
 992   {
 993    "cell_type": "code",
 994    "execution_count": null,
 995    "metadata": {
 996     "colab": {
 997      "base_uri": "https://localhost:8080/"
 998     },
 999     "id": "E1Cqq8V3PnEv",
1000     "outputId": "9a80be17-5bb5-4f8d-a61c-d3206e730309"
1001    },
1002    "outputs": [],
1003    "source": [
1004     "df_Homer_7.info()"
1005    ]
1006   }
1007  ],
1008  "metadata": {
1009   "colab": {
1010    "name": "vIMS_Visualization.ipynb",
1011    "provenance": []
1012   },
1013   "kernelspec": {
1014    "display_name": "Python 3 (ipykernel)",
1015    "language": "python",
1016    "name": "python3"
1017   },
1018   "language_info": {
1019    "codemirror_mode": {
1020     "name": "ipython",
1021     "version": 3
1022    },
1023    "file_extension": ".py",
1024    "mimetype": "text/x-python",
1025    "name": "python",
1026    "nbconvert_exporter": "python",
1027    "pygments_lexer": "ipython3",
1028    "version": "3.9.7"
1029   }
1030  },
1031  "nbformat": 4,
1032  "nbformat_minor": 1
1033 }