Python_Code: Added python code after running pylint
[thoth.git] / models / failure_prediction / python / featurecreation.py
1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330, W0106, C0412
2 # -*- coding: utf-8 -*-
3 """FeatureCreation.ipynb
4
5 Automatically generated by Colaboratory.
6
7 Original file is located at
8     https://colab.research.google.com/drive/1UQzgn71tYU7WHgr-CL1CRNM9q9Ajr2Kx
9
10 Contributors: **Rohit Singh Rathaur, Girish L.**
11
12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at
17
18     http://www.apache.org/licenses/LICENSE-2.0
19
20 Unless required by applicable law or agreed to in writing, software
21 distributed under the License is distributed on an "AS IS" BASIS,
22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License for the specific language governing permissions and
24 limitations under the License.
25 """
26
27 # Commented out IPython magic to ensure Python compatibility.
28 # Import libraries use for visualization and analysis
29 import pandas as pd
30 import numpy as np
31
32 # %matplotlib inline
33 import matplotlib
34 import matplotlib.pyplot as plt
35
36 from pandas import Series, DataFrame
37 import seaborn as sns
38 from sklearn.preprocessing import scale
39 from sklearn.decomposition import PCA
40 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
41 from scipy import stats
42 from IPython.display import display, HTML
43
44 from google.colab import drive
45 drive.mount('/gdrive')
46
47 """# **Loading the Data**"""
48
49 df_Ellis = pd.read_csv(
50     "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv")
51 #df_Bono  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv", error_bad_lines=False)
52 #df_Sprout  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv", error_bad_lines=False)
53 #df_Homer  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv", error_bad_lines=False)
54 #df_Homestead  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv", error_bad_lines=False)
55 #df_Ralf  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv", error_bad_lines=False)
56
57 df_Ellis.head()
58
59 df_Ellis.describe()
60
61 #df_Ellis['SLO1'] = 0
62 #print('Column names are: ',list(df_Ellis.columns))
63
64 df4 = df_Ellis["ellis-load.avg_1_min"] > 2.45
65 df4
66 df4.to_csv(
67     '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')
68 df4.head(50)
69
70 df3 = df_Ellis["ellis-cpu.wait_perc"] > 5
71 df3
72 df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')
73 df3.head(50)
74
75 df5 = df_Ellis["ellis-net.out_packets_sec"] > 1000
76 df5
77 df5.to_csv(
78     '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')
79 df5.head(50)
80
81 # We are applying Logical OR Operator between df4 and df3
82 df6 = (df4[0:176999]) | (df3[0:176999])
83 df6.head(50)
84
85 df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')
86 df6.head(50)
87
88 df7 = (df6[0:176999]) | (df5[0:176999])
89 df7.head(50)
90
91 df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')
92 df7.head(50)
93
94 df_Ellis.insert(7, "Label", df7)
95
96 #df_Ellis.insert (8, "Label", df7)
97
98 # We applied Logical OR operator in two features only known as  and df3
99 # and df4 and stored result in df6 which is known as Final Label after
100 # applying OR condition
101 df_Ellis
102 df_Ellis.to_csv(
103     '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')
104
105 df_Ellis.head(100)
106
107 # pandas count distinct values in column
108 df_Ellis['Label'].value_counts()
109
110 #final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')
111
112 #df_Ellis.loc[(df_Ellis["ellis-cpu.wait_perc"] > 5) & (df_Ellis["ellis-load.avg_1_min"] > 2)]
113
114 """# **Creating New Features**"""