-
Notifications
You must be signed in to change notification settings - Fork 0
/
naive_baise.py
80 lines (63 loc) · 2.07 KB
/
naive_baise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""
Created on Sat May 5 01:08:07 2018
@author: wael
"""
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from data import import_data
from splitdataset import splitdataset
import operator
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns # data visualization library
from sklearn.metrics import confusion_matrix
data = import_data()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
# sorting the futures in a list by their impact in the classification
def sort():
f_list=dict()
fc_list=dict()
a=list(X)
clf_object = GaussianNB()
n_feats = X.shape[1]
for i in range(n_feats):
xi = X.iloc[:, i].reshape(-1,1)
scores = cross_val_score(clf_object, xi, Y)
f_list[i]= scores.mean()
fc_list[i]= a[i]
f_list = sorted(f_list.items() , key=operator.itemgetter(1) ) #sorted by occuracy
return f_list,fc_list
#function to generate lists of features
def lists_generator(v,c):
L=list()
for i in range (32) :
li=list()
for j in range (i+1) :
li.append(c[v[j][0]])
L.append(li)
L.reverse()
return L
def main() :
v,c=sort()
Ls=lists_generator(v,c)
ocuur_list=list()
cm_list=list()
model = GaussianNB()
for i in range(31) :
model.fit(X_train.drop(Ls[i+1] , axis=1) , y_train)
y_pred= model.predict(X_test.drop(Ls[i+1] , axis=1))
ac=accuracy_score(y_test,y_pred)*100
ocuur_list.append(ac)
cm = confusion_matrix(y_test,y_pred)
cm_list.append(cm)
n_of_feat=list(range(1,32,1))
plt.scatter(n_of_feat,ocuur_list,marker="o",color="blue")
plt.title("performance ")
plt.xlabel("number of features ")
plt.ylabel("occuracy")
plt.plot(n_of_feat,ocuur_list,color="green",ls="-")
plt.grid()
print("best performance : " , max(ocuur_list) )
print("number of features : " , ocuur_list.index(max(ocuur_list))+1 )
main()