In [2]:
import pandas as pd
import numpy as  np
from sklearn.datasets import load_breast_cancer

In [3]:
dataset = load_breast_cancer()
X = dataset.data
Y = dataset.target

def normalize(X):
    X = (X - np.min(X)) / (np.max(X) - np.min(X))
    return X

for i in range(X.shape[1]):
    X[:,i] = normalize(X[:,i])

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25,  random_state=3)

from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, Y_train)


from sklearn import metrics
Y_pred = gnb.predict(X_test)
print("Naive Bayes Accuracy is: ", metrics.accuracy_score(Y_test , Y_pred) * 100)

Naive Bayes Accuracy is:  93.7062937062937


In [4]:
from sklearn.neighbors import KNeighborsClassifier
knc = KNeighborsClassifier(5)
knc.fit(X_train, Y_train)
Y_pred = knc.predict(X_test)
knn_accuracy = (metrics.accuracy_score(Y_test, Y_pred) * 100)
print("KNN Accuracy is: ", metrics.accuracy_score(Y_test , Y_pred) * 100)

KNN Accuracy is:  95.1048951048951


In [5]:
from sklearn import tree
dtc = tree.DecisionTreeClassifier()
dtc.fit(X_train,Y_train)
Y_pred = dtc.predict(X_test)
print("Decision tree Accuracy is: ", metrics.accuracy_score(Y_test , Y_pred) * 100)

Decision tree Accuracy is:  93.00699300699301


In [6]:
def ClassifierCrossValid(X,Y):
    from sklearn.model_selection import cross_validate,cross_val_score
    gnb = GaussianNB()
    knn = KNeighborsClassifier(n_neighbors=3)
    dtc = tree.DecisionTreeClassifier()
    bayesScore = cross_val_score(gnb, X, Y, cv=4)
    knnScore = cross_val_score(knn, X, Y, cv=4)
    d_treeScore = cross_val_score(dtc, X, Y, cv=4)
    return bayesScore,knnScore,d_treeScore
    
Naive_Bayes_Score, KNN_Score, Tree_Score = ClassifierCrossValid(X,Y)
print('Naive Bayes Score :',(Naive_Bayes_Score*100).mean())
print('KNN Score :',(KNN_Score*100).mean())
print('Tree Score :',(Tree_Score*100).mean())

Naive Bayes Score : 92.97498276371516
KNN Score : 96.83714173150793
Tree Score : 91.21565054663647


In [7]:
def Random_SubSampling(X,Y):
    from sklearn.model_selection import train_test_split
    from sklearn import metrics
    accuracy = np.empty((10,3))
    for i in range(10):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=3)
        gnb = GaussianNB()
        knn = KNeighborsClassifier(n_neighbors=5)
        dtc = tree.DecisionTreeClassifier()
        
        gnb.fit(X_train, Y_train)
        knc.fit(X_train, Y_train)
        dtc.fit(X_train,Y_train)
        
        Y_pred = gnb.predict(X_test)
        accuracy[i][0] = metrics.accuracy_score(Y_test , Y_pred) * 100
        
        Y_pred = knc.predict(X_test)
        accuracy[i][1] = metrics.accuracy_score(Y_test , Y_pred) * 100
        
        Y_pred = dtc.predict(X_test)
        accuracy[i][2] = metrics.accuracy_score(Y_test , Y_pred) * 100
    naive_accuracy = np.mean(accuracy[:,0])
    knn_accuracy = np.mean(accuracy[:,1])
    d_tree_accuracy = np.mean(accuracy[:,2])
    return naive_accuracy,knn_accuracy,d_tree_accuracy


Naive_Bayes_Score, KNN_Score, Tree_Score =  Random_SubSampling(X,Y) 
print('Naive Bayes Accuracy :',Naive_Bayes_Score)
print('KNN Accuracy:',KNN_Score)
print('Tree Accuracy :',Tree_Score)

Naive Bayes Accuracy : 93.70629370629372
KNN Accuracy: 95.10489510489512
Tree Accuracy : 93.28671328671331
