In [149]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.kernel_approximation import Nystroem
from sklearn.ensemble.bagging import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
# from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, auc, roc_auc_score

%matplotlib inline

In [12]:
df_val = pd.read_csv('./Dig-MNIST.csv')
df_val.shape

(10240, 785)

In [13]:
df_train = pd.read_csv('./train.csv')
df_train.shape

(60000, 785)

In [14]:
df_test = pd.read_csv('./test.csv')
df_test.shape

(5000, 785)

In [9]:
df_train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
df_test

Unnamed: 0,id,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,4995,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4996,4996,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4997,4997,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4998,4998,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
df_val.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
X_cols = ['pixel'+str(i) for i in range(784)]
y = 'label'

In [18]:
X_train, X_test, X_val = [df[X_cols] for df in (df_train, df_test, df_val)]

In [24]:
y_train, y_val = [df[y] for df in (df_train, df_val)]

# Models

### Without Neural Networks 

##### 1) Simple SVC

In [60]:
pipe_chain = [('sc',StandardScaler()), ('kernel', Nystroem()), ('linear_svm', LinearSVC())]

In [61]:
pipe_chain

[('sc', StandardScaler(copy=True, with_mean=True, with_std=True)),
 ('kernel',
  Nystroem(coef0=None, degree=None, gamma=None, kernel='rbf', kernel_params=None,
           n_components=100, random_state=None)),
 ('linear_svm',
  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
            intercept_scaling=1, loss='squared_hinge', max_iter=1000,
            multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
            verbose=0))]

In [62]:
pipe_svc = Pipeline(pipe_chain)

In [63]:
pipe_svc.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('sc',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('kernel',
                 Nystroem(coef0=None, degree=None, gamma=None, kernel='rbf',
                          kernel_params=None, n_components=100,
                          random_state=None)),
                ('linear_svm',
                 LinearSVC(C=1.0, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='squared_hinge', max_iter=1000,
                           multi_class='ovr', penalty='l2', random_state=None,
                           tol=0.0001, verbose=0))],
         verbose=False)

In [64]:
# pipe_svc.score(X_train)

In [65]:
pipe_svc.score(X_train, y_train)

0.9382833333333334

In [66]:
pipe_svc.score(X_val, y_val)

0.44482421875

In [168]:
df_submit_svm = pd.DataFrame({'label':pipe_svc.predict(X_test)})

df_submit_svm['id'] = list(df_submit_svm.index)

df_submit_svm = df_submit_svm.loc[:,['id','label']]

df_submit_svm

Unnamed: 0,id,label
0,0,3
1,1,0
2,2,2
3,3,6
4,4,7
...,...,...
4995,4995,1
4996,4996,1
4997,4997,1
4998,4998,6


In [169]:
confusion_matrix(y_train, pipe_svc.predict(X_train))

array([[5650,  174,   25,   59,    6,    3,   18,   20,   36,    9],
       [  28, 5756,   29,   27,    7,   20,  105,    7,    6,   15],
       [  23,    6, 5824,    8,    0,    2,  132,    1,    2,    2],
       [  90,   13,    4, 5528,   81,   67,   66,  139,    6,    6],
       [  11,    4,    2,   35, 5780,   99,   11,   16,   14,   28],
       [   0,   12,   39,   46,   92, 5672,   97,   16,   16,   10],
       [   4,    4,    2,   46,    6,    6, 5726,  183,    4,   19],
       [  29,    8,    2,  174,   49,   25,  743, 4954,    5,   11],
       [  62,   12,   22,   13,   28,   11,  101,    0, 5709,   42],
       [   2,    0,    1,    3,   41,    0,  158,   41,   56, 5698]])

In [172]:
precision_score(y_train, pipe_svc.predict(X_train), average='micro')

(0.9382833333333334, 0.9410446580789159)

In [174]:
precision_score(y_val, pipe_svc.predict(X_val), average='micro')

0.44482421875

In [173]:
accuracy_score(y_train, pipe_svc.predict(X_train))

0.9382833333333334

In [178]:
# roc_auc_score(y_val, pipe_svc.predict(X_val), multi_class='ovo')

##### 2) SGDClassifier

In [81]:
pipe_chain = [('sc',StandardScaler()), ('kernel', Nystroem()), ('sgd', SGDClassifier())]

In [82]:
pipe_sgd = Pipeline(pipe_chain)
pipe_sgd.fit(X_train, y_train)
pipe_sgd.score(X_train, y_train)

0.9121333333333334

In [83]:
pipe_sgd.score(X_val, y_val)

0.405078125

In [84]:
df_submit_sgd = pd.DataFrame({'label':pipe_sgd.predict(X_test)})

df_submit_sgd['id'] = list(df_submit_sgd.index)

df_submit_sgd = df_submit_sgd.loc[:,['id','label']]

df_submit_sgd

Unnamed: 0,id,label
0,0,3
1,1,0
2,2,2
3,3,6
4,4,7
...,...,...
4995,4995,1
4996,4996,1
4997,4997,1
4998,4998,7


##### 3) Logistic Regression

In [113]:
pipe_chain = [('sc',StandardScaler()), ('kernel', Nystroem()), ('lr', LogisticRegression(n_jobs=-1, C=100.0, tol=0.001, max_iter=100))]

In [114]:
pipe_lr = Pipeline(pipe_chain)
pipe_lr.fit(X_train, y_train)
pipe_lr.score(X_train, y_train)

0.9457333333333333

In [115]:
pipe_lr.score(X_val, y_val)

0.46484375

In [121]:
pipe_chain = [('sc',StandardScaler()), ('lr', LogisticRegression(n_jobs=-1, C=100.0, tol=0.001, max_iter=10))]

In [122]:
pipe_lr = Pipeline(pipe_chain)
pipe_lr.fit(X_train, y_train)
pipe_lr.score(X_train, y_train)

0.9686833333333333

In [123]:
pipe_lr.score(X_val, y_val)

0.58740234375

##### 4) Ridge

In [133]:
pipe_chain = [('sc',StandardScaler()), ('kernel', Nystroem()), ('ridge', RidgeClassifier(alpha=1000.0))]

In [134]:
pipe_ridge = Pipeline(pipe_chain)
pipe_ridge.fit(X_train, y_train)
pipe_ridge.score(X_train, y_train)

0.8031833333333334

In [135]:
pipe_ridge.score(X_val, y_val)

0.26650390625

##### 5) Bagging

In [145]:
pipe_chain = [('sc',StandardScaler()), ('bag', BaggingClassifier(n_estimators=20, n_jobs=-1))]

In [146]:
pipe_bag = Pipeline(pipe_chain)
pipe_bag.fit(X_train, y_train)
pipe_bag.score(X_train, y_train)

0.99955

In [147]:
pipe_bag.score(X_val, y_val)

0.58291015625

##### 6) AdaBoost

In [165]:
pipe_chain = [('sc',StandardScaler()), ('kernel', Nystroem()), ('ada', AdaBoostClassifier(base_estimator=LogisticRegression(n_jobs=-1, C=100.0, tol=0.001, max_iter=100), n_estimators=20))]

In [166]:
pipe_ada = Pipeline(pipe_chain)
pipe_ada.fit(X_train, y_train)
pipe_ada.score(X_train, y_train)

0.8635166666666667

In [167]:
pipe_ada.score(X_val, y_val)

0.33857421875