1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330, W0106, C0412
2 # -*- coding: utf-8 -*-
3 """FeatureCreation.ipynb
5 Automatically generated by Colaboratory.
7 Original file is located at
8 https://colab.research.google.com/drive/1UQzgn71tYU7WHgr-CL1CRNM9q9Ajr2Kx
10 Contributors: **Rohit Singh Rathaur, Girish L.**
12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at
18 http://www.apache.org/licenses/LICENSE-2.0
20 Unless required by applicable law or agreed to in writing, software
21 distributed under the License is distributed on an "AS IS" BASIS,
22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License for the specific language governing permissions and
24 limitations under the License.
27 # Commented out IPython magic to ensure Python compatibility.
28 # Import libraries use for visualization and analysis
34 import matplotlib.pyplot as plt
36 from pandas import Series, DataFrame
38 from sklearn.preprocessing import scale
39 from sklearn.decomposition import PCA
40 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
41 from scipy import stats
42 from IPython.display import display, HTML
44 from google.colab import drive
45 drive.mount('/gdrive')
47 """# **Loading the Data**"""
49 df_Ellis = pd.read_csv(
50 "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv")
51 #df_Bono = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv", error_bad_lines=False)
52 #df_Sprout = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv", error_bad_lines=False)
53 #df_Homer = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv", error_bad_lines=False)
54 #df_Homestead = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv", error_bad_lines=False)
55 #df_Ralf = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv", error_bad_lines=False)
62 #print('Column names are: ',list(df_Ellis.columns))
64 df4 = df_Ellis["ellis-load.avg_1_min"] > 2.45
67 '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')
70 df3 = df_Ellis["ellis-cpu.wait_perc"] > 5
72 df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')
75 df5 = df_Ellis["ellis-net.out_packets_sec"] > 1000
78 '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')
81 # We are applying Logical OR Operator between df4 and df3
82 df6 = (df4[0:176999]) | (df3[0:176999])
85 df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')
88 df7 = (df6[0:176999]) | (df5[0:176999])
91 df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')
94 df_Ellis.insert(7, "Label", df7)
96 #df_Ellis.insert (8, "Label", df7)
98 # We applied Logical OR operator in two features only known as and df3
99 # and df4 and stored result in df6 which is known as Final Label after
100 # applying OR condition
103 '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')
107 # pandas count distinct values in column
108 df_Ellis['Label'].value_counts()
110 #final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')
112 #df_Ellis.loc[(df_Ellis["ellis-cpu.wait_perc"] > 5) & (df_Ellis["ellis-load.avg_1_min"] > 2)]
114 """# **Creating New Features**"""