In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

X_train = np.load('data/train_x.npy')
y_train = np.load('data/train_y.npy')
X_test = np.load('data/test_x.npy')
y_test = np.load('data/test_y.npy')

classifiers = [
    KNeighborsClassifier(3),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()]

# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = accuracy_score(y_test, train_predictions)
    print("Accuracy: {:.4%}".format(acc))
    
    train_predictions = clf.predict_proba(X_test)
    ll = log_loss(y_test, train_predictions)
    print("Log Loss: {}".format(ll))
    
    log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)
    log = log.append(log_entry)
    
print("="*30)

KNeighborsClassifier
****Results****
Accuracy: 30.7000%
Log Loss: 16.319707937118526
DecisionTreeClassifier
****Results****
Accuracy: 32.1300%
Log Loss: 4.584463549025097




RandomForestClassifier
****Results****
Accuracy: 33.2800%
Log Loss: 2.8311889831451396
AdaBoostClassifier
****Results****
Accuracy: 31.5200%
Log Loss: 2.2750790198432127
GradientBoostingClassifier
****Results****
Accuracy: 33.5900%
Log Loss: 1.7740469356464432
GaussianNB
****Results****
Accuracy: 29.9400%
Log Loss: 17.073932071601973
LinearDiscriminantAnalysis
****Results****
Accuracy: 32.7500%
Log Loss: 1.9396881625299354
QuadraticDiscriminantAnalysis
****Results****
Accuracy: 28.8800%
Log Loss: 20.614786481808387
