In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from pathlib import Path
import urllib
import gzip
import shutil
import struct
import pickle

# Download and Unzip

In [2]:
root_path = Path()
train_data_path = root_path / "train_data"

In [3]:
train_img_url = "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"
trian_label_url = "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"
test_img_url = "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"
test_label_url = "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"
urls = [train_img_url, trian_label_url, test_img_url, test_label_url]

train_img_gz_name = "train-images-idx3-ubyte.gz"
train_label_gz_name = "train-labels-idx1-ubyte.gz"
test_img_gz_name = "t10k-images-idx3-ubyte.gz"
test_label_gz_name = "t10k-labels-idx1-ubyte.gz"

gz_paths = [train_data_path / train_img_gz_name, 
            train_data_path / train_label_gz_name, 
            train_data_path / test_img_gz_name, 
            train_data_path / test_label_gz_name,
           ]

unzip_paths = [train_data_path / train_img_gz_name.split(".")[0], 
            train_data_path / train_label_gz_name.split(".")[0], 
            train_data_path / test_img_gz_name.split(".")[0], 
            train_data_path / test_label_gz_name.split(".")[0],
           ]

In [4]:
if not train_data_path.exists():
    train_data_path.mkdir()

In [5]:
def download(filepath, url):
    if not filepath.exists():
        print(f"Downloading {filepath} from {url}")
        urllib.request.urlretrieve(url, filepath)

In [6]:
for gz_path, url in zip(gz_paths, urls):
    download(gz_path,  url)

In [7]:
def unzip_gz(input_path, output_path):
    with gzip.open(input_path, 'rb') as f_in:
        with open(output_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

In [8]:
for gz_path, unzip_path in zip(gz_paths, unzip_paths):
    unzip_gz(gz_path, unzip_path)

# Read Train Test Data to Array

In [9]:
# https://gist.github.com/tylerneylon/ce60e8a06e7506ac45788443f7269e40
def read_idx(filepath):
    '''
    Function to read mnist idx format as array
    '''
    with open(filepath, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

In [10]:
# Read data as np array
X_train = read_idx(unzip_paths[0])
y_train = read_idx(unzip_paths[1])
X_test = read_idx(unzip_paths[2])
y_test = read_idx(unzip_paths[3])

In [11]:
# Flatten input
X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

In [12]:
# Normalize
X_train = X_train / 255
X_test = X_test / 255

In [13]:
print(
    f"X_train shape: \t{X_train.shape}\n"+
    f"y_train shape: \t{y_train.shape}\n"+
    f"X_test shape: \t{X_test.shape}\n"+
    f"y_test shape: \t{y_test.shape}"
)

X_train shape: 	(60000, 784)
y_train shape: 	(60000,)
X_test shape: 	(10000, 784)
y_test shape: 	(10000,)


# Train

In [14]:
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,100,), 
                               activation='relu', 
                               solver='adam', 
                               alpha=0.0001, 
                               max_iter=200, 
                               verbose=False,
                               learning_rate_init=0.001,)

In [15]:
%%time
mlp_classifier.fit(X_train, y_train)

Wall time: 37.3 s


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [16]:
test_accuracy = accuracy_score(y_test, mlp_classifier.predict(X_test))
print(f"Accuracy: {test_accuracy}")

Accuracy: 0.982


In [17]:
class_report = classification_report(y_test, mlp_classifier.predict(X_test))
print(class_report)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.98      0.98      1032
           3       0.98      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.98      0.98       892
           6       0.98      0.98      0.98       958
           7       0.98      0.98      0.98      1028
           8       0.98      0.98      0.98       974
           9       0.97      0.98      0.97      1009

   micro avg       0.98      0.98      0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



# Save Model

In [23]:
model_path = root_path / "model" / "mlp_model.pkl"
with open(model_path, 'wb') as f:
    pickle.dump(mlp_classifier, f)

# Test Loading Model and Evaluate

In [24]:
with open(model_path, 'rb') as f:
    clf = pickle.load(f)

In [25]:
clf_accuracy = accuracy_score(y_test, clf.predict(X_test))
print(f"Accuracy: {clf_accuracy}")

Accuracy: 0.982
