In [1]:
#imported all necessary modules and dependencies 
from sklearn.tree import DecisionTreeClassifier as dtc
from sklearn.neighbors import KNeighborsClassifier as kn
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier as gpc
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier as rf, AdaBoostClassifier as ab
from sklearn.naive_bayes import GaussianNB as gnb 
from sklearn.neural_network import MLPClassifier as mlp
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as qda
from sklearn.metrics import accuracy_score as acc

import numpy as np
import pandas as pd
 

In [2]:
 
#created 10 different classifiers based on sckikit-learn documentation 
dtc = dtc()
kn = kn(3)
svc = SVC(kernel='linear', C=0.025)
svc_gam = SVC(gamma=2,C=1)
gpc = gpc(1.0 * RBF(1.0))
rf = rf(max_depth=5, n_estimators=10, max_features=1)
mlp = mlp(alpha=1)
ab = ab()
gnb = gnb()
qda = qda()

# [height, weight, shoe_size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40],
     [190, 90, 47], [175, 64, 39],
     [177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]]

Y = ['male', 'male', 'female', 'female', 'male', 'male', 'female', 'female',
     'female', 'male', 'male']

In [3]:
"""Imported the time module to record the total running time of training, 
predicting, and measuring the predictive accuracy of each classifier"""
import time 

start_time = time.clock()
clf0 = dtc.fit(X,Y)
pred_tree = clf0.predict(X)
acc_tree = acc(Y, pred_tree) * 100
tree_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf1 = kn.fit(X, Y)
pred_kn = clf1.predict(X)
acc_kn = acc(Y, pred_kn) * 100
kn_time = (time.clock() - start_time)*1000 

start_time = time.clock()
clf2 = svc.fit(X, Y)
pred_svc = clf2.predict(X)
acc_svc = acc(Y, pred_svc) * 100
svc_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf3 = svc_gam.fit(X, Y)
pred_svc_gam= clf3.predict(X)
acc_svc_gam = acc(Y, pred_svc_gam) * 100
svc_gam_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf4 = gpc.fit(X, Y)
pred_gpc = clf4.predict(X)
acc_gpc = acc(Y, pred_gpc) * 100
gpc_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf5 = rf.fit(X, Y)
pred_rf = clf5.predict(X)
acc_rf = acc(Y, pred_rf) * 100
rf_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf6 = mlp.fit(X, Y)
pred_mlp = clf6.predict(X)
acc_mlp = acc(Y, pred_mlp) * 100
mlp_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf7 = ab.fit(X, Y)
pred_ab = clf7.predict(X)
acc_ab = acc(Y, pred_ab) * 100
ab_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf8 = gnb.fit(X, Y)
pred_gnb = clf8.predict(X)
acc_gnb = acc(Y, pred_gnb) * 100
gnb_time = (time.clock() - start_time)*1000

start_time = time.clock()
clf9 = qda.fit(X, Y)
pred_qda = clf9.predict(X)
acc_qda = acc(Y, pred_qda) * 100
qda_time = (time.clock() - start_time)*1000

In [4]:
# Stored the classifiers and their accuracy scores as keys and values respectively as a dictionary in the variable 'clf_acc_dict'
clf_acc_dict = {'Tree': [acc_tree, tree_time], 'KNeighbors': [acc_kn, kn_time], 'SVC': [acc_svc, svc_time], 'SVC_gamma': [acc_svc_gam, svc_gam_time], 
'Gaussian_Process': [acc_gpc, gpc_time], 'RandomForest': [acc_rf, rf_time], 'Perceptron':[acc_mlp, mlp_time], 'AdaBoost': [acc_ab, ab_time], 
'GaussianNB': [acc_gnb, gnb_time], 'QuadraticDiscriminantAnalysis': [acc_qda, qda_time]}

In [5]:
"""here is experimentation using Pandas Series function and using for loop with print functions. 
The latter gives a better display."""


#stores visualization of classifiers with corresponding accuracy scores in variable 'score_table' and displays the visualization using print function
score_table = pd.Series(clf_acc_dict).reset_index()#.reset_index() generates alignment for better readability in my opinion
print(score_table)

print('')

#using a for loop, items() function, and 2 print functions to better display the dictionary with headers 
print("{:<32} {:<20} {:<10}".format('CLASSIFIER','ACCURACY','RUN TIME'))
for k, v in clf_acc_dict.items():
    accuracy, time = v
    print("{:<32} {:<20} {:<10}".format(k, accuracy, time))

print('')

print('The classifier with max accuracy and minimum time is Support Vector Machine Classifier with the defined gamma argument')



 

 

                           index                                    0
0                       AdaBoost           [100.0, 50.04651802044678]
1                     GaussianNB  [81.8181818182, 0.7706435043172699]
2               Gaussian_Process          [100.0, 26.314020803221222]
3                     KNeighbors  [81.8181818182, 1.3117175570789705]
4                     Perceptron   [54.5454545455, 7.052841752641939]
5  QuadraticDiscriminantAnalysis          [100.0, 0.9016642274912445]
6                   RandomForest           [100.0, 11.46525723106014]
7                            SVC  [90.9090909091, 0.8238824725234064]
8                      SVC_gamma          [100.0, 0.5761891168976707]
9                           Tree          [100.0, 1.1886260419356933]

CLASSIFIER                       ACCURACY             RUN TIME  
Tree                             100.0                1.1886260419356933
KNeighbors                       81.81818181818183    1.3117175570789705
SVC               