models/failure_prediction/python/featurecreation.py

   1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330, W0106, C0412
   2 # -*- coding: utf-8 -*-
   3 """FeatureCreation.ipynb
   4
   5 Automatically generated by Colaboratory.
   6
   7 Original file is located at
   8     https://colab.research.google.com/drive/1UQzgn71tYU7WHgr-CL1CRNM9q9Ajr2Kx
   9
  10 Contributors: **Rohit Singh Rathaur, Girish L.**
  11
  12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
  13
  14 Licensed under the Apache License, Version 2.0 (the "License");
  15 you may not use this file except in compliance with the License.
  16 You may obtain a copy of the License at
  17
  18     http://www.apache.org/licenses/LICENSE-2.0
  19
  20 Unless required by applicable law or agreed to in writing, software
  21 distributed under the License is distributed on an "AS IS" BASIS,
  22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23 See the License for the specific language governing permissions and
  24 limitations under the License.
  25 """
  26
  27 # Commented out IPython magic to ensure Python compatibility.
  28 # Import libraries use for visualization and analysis
  29 import pandas as pd
  30 import numpy as np
  31
  32 # %matplotlib inline
  33 import matplotlib
  34 import matplotlib.pyplot as plt
  35
  36 from pandas import Series, DataFrame
  37 import seaborn as sns
  38 from sklearn.preprocessing import scale
  39 from sklearn.decomposition import PCA
  40 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
  41 from scipy import stats
  42 from IPython.display import display, HTML
  43
  44 from google.colab import drive
  45 drive.mount('/gdrive')
  46
  47 """# **Loading the Data**"""
  48
  49 df_Ellis = pd.read_csv(
  50     "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv")
  51 #df_Bono  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv", error_bad_lines=False)
  52 #df_Sprout  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv", error_bad_lines=False)
  53 #df_Homer  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv", error_bad_lines=False)
  54 #df_Homestead  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv", error_bad_lines=False)
  55 #df_Ralf  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv", error_bad_lines=False)
  56
  57 df_Ellis.head()
  58
  59 df_Ellis.describe()
  60
  61 #df_Ellis['SLO1'] = 0
  62 #print('Column names are: ',list(df_Ellis.columns))
  63
  64 df4 = df_Ellis["ellis-load.avg_1_min"] > 2.45
  65 df4
  66 df4.to_csv(
  67     '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')
  68 df4.head(50)
  69
  70 df3 = df_Ellis["ellis-cpu.wait_perc"] > 5
  71 df3
  72 df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')
  73 df3.head(50)
  74
  75 df5 = df_Ellis["ellis-net.out_packets_sec"] > 1000
  76 df5
  77 df5.to_csv(
  78     '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')
  79 df5.head(50)
  80
  81 # We are applying Logical OR Operator between df4 and df3
  82 df6 = (df4[0:176999]) | (df3[0:176999])
  83 df6.head(50)
  84
  85 df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')
  86 df6.head(50)
  87
  88 df7 = (df6[0:176999]) | (df5[0:176999])
  89 df7.head(50)
  90
  91 df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')
  92 df7.head(50)
  93
  94 df_Ellis.insert(7, "Label", df7)
  95
  96 #df_Ellis.insert (8, "Label", df7)
  97
  98 # We applied Logical OR operator in two features only known as  and df3
  99 # and df4 and stored result in df6 which is known as Final Label after
 100 # applying OR condition
 101 df_Ellis
 102 df_Ellis.to_csv(
 103     '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')
 104
 105 df_Ellis.head(100)
 106
 107 # pandas count distinct values in column
 108 df_Ellis['Label'].value_counts()
 109
 110 #final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')
 111
 112 #df_Ellis.loc[(df_Ellis["ellis-cpu.wait_perc"] > 5) & (df_Ellis["ellis-load.avg_1_min"] > 2)]
 113
 114 """# **Creating New Features**"""