Python_Code: Added python code after running pylint

[thoth.git] / models / failure_prediction / python / featurecreation.py
diff --git a/models/failure_prediction/python/featurecreation.py b/models/failure_prediction/python/featurecreation.py

new file mode 100644 (file)

index 0000000..7ed5cf3
--- /dev/null
+++ b/models/failure_prediction/python/featurecreation.py
@@ -0,0 +1,114 @@
+# pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330, W0106, C0412
+# -*- coding: utf-8 -*-
+"""FeatureCreation.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1UQzgn71tYU7WHgr-CL1CRNM9q9Ajr2Kx
+
+Contributors: **Rohit Singh Rathaur, Girish L.**
+
+Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+# Commented out IPython magic to ensure Python compatibility.
+# Import libraries use for visualization and analysis
+import pandas as pd
+import numpy as np
+
+# %matplotlib inline
+import matplotlib
+import matplotlib.pyplot as plt
+
+from pandas import Series, DataFrame
+import seaborn as sns
+from sklearn.preprocessing import scale
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from scipy import stats
+from IPython.display import display, HTML
+
+from google.colab import drive
+drive.mount('/gdrive')
+
+"""# **Loading the Data**"""
+
+df_Ellis = pd.read_csv(
+    "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv")
+#df_Bono  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv", error_bad_lines=False)
+#df_Sprout  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv", error_bad_lines=False)
+#df_Homer  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv", error_bad_lines=False)
+#df_Homestead  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv", error_bad_lines=False)
+#df_Ralf  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv", error_bad_lines=False)
+
+df_Ellis.head()
+
+df_Ellis.describe()
+
+#df_Ellis['SLO1'] = 0
+#print('Column names are: ',list(df_Ellis.columns))
+
+df4 = df_Ellis["ellis-load.avg_1_min"] > 2.45
+df4
+df4.to_csv(
+    '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')
+df4.head(50)
+
+df3 = df_Ellis["ellis-cpu.wait_perc"] > 5
+df3
+df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')
+df3.head(50)
+
+df5 = df_Ellis["ellis-net.out_packets_sec"] > 1000
+df5
+df5.to_csv(
+    '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')
+df5.head(50)
+
+# We are applying Logical OR Operator between df4 and df3
+df6 = (df4[0:176999]) | (df3[0:176999])
+df6.head(50)
+
+df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')
+df6.head(50)
+
+df7 = (df6[0:176999]) | (df5[0:176999])
+df7.head(50)
+
+df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')
+df7.head(50)
+
+df_Ellis.insert(7, "Label", df7)
+
+#df_Ellis.insert (8, "Label", df7)
+
+# We applied Logical OR operator in two features only known as  and df3
+# and df4 and stored result in df6 which is known as Final Label after
+# applying OR condition
+df_Ellis
+df_Ellis.to_csv(
+    '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')
+
+df_Ellis.head(100)
+
+# pandas count distinct values in column
+df_Ellis['Label'].value_counts()
+
+#final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')
+
+#df_Ellis.loc[(df_Ellis["ellis-cpu.wait_perc"] > 5) & (df_Ellis["ellis-load.avg_1_min"] > 2)]
+
+"""# **Creating New Features**"""