In [11]:
%load_ext autoreload
%autoreload 2
import pickle
import gzip
import numpy as np
import os
import sys
sys.path.insert(0,"../../")
import minidl as mdl
workdir = os.getcwd()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Preparing the data

In [12]:
def prepare_data(data_url, workdir):
    mdl.utils.download(data_url, workdir+"/mnist.pkl.gz")
    f = gzip.open(workdir+"/mnist.pkl.gz", 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
    f.close()
    training_img=np.int32(training_data[0]*256)
    training_label=np.int32(training_data[1])
    validation_img = np.int32(validation_data[0]*256)
    validation_label=np.int32(validation_data[1])
    test_img=np.int32(test_data[0]*256)
    test_label=np.int32(test_data[1])

    tr_img = np.concatenate((training_img,validation_img))
    tr_label = np.concatenate((training_label,validation_label))

    tr_label=np.reshape(tr_label,(60000,1))
    test_label=np.reshape(test_label,(10000,1))

    np.savetxt('train_label.csv',tr_label,delimiter=',', fmt='%d')
    np.savetxt('test_label.csv',test_label,delimiter=',', fmt='%d')
    np.savetxt('train_image.csv',tr_img,delimiter=',', fmt='%d')
    np.savetxt('test_image.csv',test_img,delimiter=',', fmt='%d')
    return

In [13]:
data_url = "https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz"
prepare_data(data_url, workdir)

Downloading https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz to /Users/yongshengli/Documents/codes/minidl/examples/mnist


## Preprocessing the data

In [14]:
def load_data():
    x_train = np.genfromtxt("train_image.csv", delimiter=",")
    y_train = np.genfromtxt("train_label.csv", delimiter=",")
    x_test = np.genfromtxt("test_image.csv", delimiter=",")
    y_test = np.genfromtxt("test_label.csv", delimiter=",")
    x_train /= 255.0
    x_test /= 255.0
    return x_train, y_train, x_test,  y_test

## Generate data generator for batch training

In [15]:
x_train, y_train, x_test, y_test = load_data()
train_gen = mdl.datagen.DataGenerator(x_train.T, mdl.utils.to_onehot(y_train, label_num=10).T)

## Construct and Train the model

In [16]:
# layers = [mdl.layer.Layer(28*28, 128, activation=mdl.math.sigmoid, activation_dt=mdl.math.sigmoid_dt, keep_prob=0.7),
#           mdl.layer.Layer(128, 64, activation=mdl.math.sigmoid, activation_dt=mdl.math.sigmoid_dt, keep_prob=0.7),
#           mdl.layer.Layer(64, 10, activation=mdl.math.softmax, activation_dt=mdl.math.softmax_dt)]
# optimizer = mdl.optimizer.Adam()
# network = mdl.network.NeutralNetwork(layers, optimizer)
# model = mdl.model.Model(network)
# model.train(train_gen, batch_size=64, max_epoch=5)

In [17]:
init = mdl.initializer.Initializer()
net = mdl.network.Sequential(layers=[
    mdl.layer.Dense(w = init.He(shape=(128, 28*28)), b=init.zeros(shape=(128, 1))),
    mdl.layer.Sigmoid(),
    mdl.layer.Dense(w = init.He(shape=(64, 128)), b=init.zeros(shape=(64, 1))),
    mdl.layer.Sigmoid(),
    mdl.layer.Dense(w = init.He(shape=(10, 64)), b=init.zeros(shape=(10, 1))),
    mdl.layer.Softmax()
])
model = mdl.model.Model(net, lossfn=mdl.loss.CrossEntropyLoss(), optimizer=mdl.optimizer.Adam())
model.train(train_gen, batch_size=64, max_epoch=10, print_n=1000, print_type="step", x_val=x_test, y_val=y_test)

Total steps:  9380
[ Epoch 0002, Step 00001000 ] loss: 0.307173, val_accuracy: 0.930300
[ Epoch 0003, Step 00002000 ] loss: 0.106331, val_accuracy: 0.950300
[ Epoch 0004, Step 00003000 ] loss: 0.155403, val_accuracy: 0.958000
[ Epoch 0005, Step 00004000 ] loss: 0.076921, val_accuracy: 0.966700
[ Epoch 0006, Step 00005000 ] loss: 0.031734, val_accuracy: 0.969600
[ Epoch 0007, Step 00006000 ] loss: 0.081209, val_accuracy: 0.971500
[ Epoch 0008, Step 00007000 ] loss: 0.035783, val_accuracy: 0.974700
[ Epoch 0009, Step 00008000 ] loss: 0.024468, val_accuracy: 0.974600
[ Epoch 0010, Step 00009000 ] loss: 0.032426, val_accuracy: 0.978400


## Evaluate the model using the test data

In [18]:
train_acc, train_preds = model.predict(x_train, y_train)

In [19]:
test_acc, test_preds = model.predict(x_test, y_test)
np.savetxt("test_predictions.csv", test_preds, fmt='%i', delimiter=",")

In [20]:
print(f"train_accuracy: {train_acc}, test_accuracy: {test_acc}")

train_accuracy: 0.9922333333333333, test_accuracy: 0.9763
