# MNIST

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata

In [2]:
mnist = fetch_mldata("MNIST original")

In [3]:
mnist

{'DESCR': 'mldata.org dataset: mnist-original',
 'COL_NAMES': ['label', 'data'],
 'target': array([0., 0., 0., ..., 9., 9., 9.]),
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}

In [4]:
X, y = mnist['data'], mnist['target']

In [5]:
X.shape

(70000, 784)

In [6]:
X_train = np.array(X[:60000], dtype=float)
y_train = np.array(y[:60000], dtype=float)
X_test = np.array(X[60000:], dtype=float)
y_test = np.array(y[60000:], dtype=float)

In [7]:
X_train.shape

(60000, 784)

In [8]:
X_test.shape

(10000, 784)

# just use kNN

In [9]:
from sklearn.neighbors import KNeighborsClassifier

In [10]:
knn_clf = KNeighborsClassifier()
%time knn_clf.fit(X_train, y_train)

CPU times: user 29.6 s, sys: 306 ms, total: 29.9 s
Wall time: 30.7 s


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [11]:
%time knn_clf.score(X_test, y_test)

CPU times: user 10min 23s, sys: 2.05 s, total: 10min 25s
Wall time: 10min 31s


0.9688

# use kNN with PCA

In [13]:
from sklearn.decomposition import PCA

pca = PCA(0.9)
pca.fit(X_train)
X_train_reduction = pca.transform(X_train)

In [14]:
X_train_reduction.shape

(60000, 87)

In [15]:
knn_clf = KNeighborsClassifier()
%time knn_clf.fit(X_train_reduction, y_train)

CPU times: user 317 ms, sys: 5.31 ms, total: 323 ms
Wall time: 323 ms


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [16]:
X_test_reduction = pca.transform(X_test)

In [17]:
%time knn_clf.score(X_test_reduction, y_test)

CPU times: user 1min 15s, sys: 469 ms, total: 1min 15s
Wall time: 1min 17s


0.9728