# Model Building for MNIST

In [2]:
from theano.sandbox import cuda
cuda.use('gpu1')

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29



In [3]:
%matplotlib inline
from importlib import reload
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using Theano backend.


## Setup

In [4]:
batch_size = 64
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.pkl.gz


((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [5]:
# Because MNIST is grey-scale images, it does not have the color column,
# Let's add one empty dim  to the X data
X_test = np.expand_dims(X_test, 1)
X_train = np.expand_dims(X_train, 1)
X_train.shape

(60000, 1, 28, 28)

In [6]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [7]:
y_train = onehot(y_train)
y_test = onehot(y_test)
y_train[:5]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

Now, let's normalize the inputs

In [8]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [9]:
def norm_input(x): return (x-mean_px)/std_px

## Linear model

Why not we just fine-tune the imagenet model?

Because imageNet is 214 x 214 and is full-color. Here we have 28 x 28 and greyscale.

So we need to start from scratch.

In [15]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

lm = get_lin_model()

  .format(self.name, input_shape))


In [16]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [17]:
lm.fit_generator(batches, batches.N, nb_epoch=1, 
                validation_data=test_batches, nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7faf22bab400>

It's always recommended to start with epoch 1 and a low learning rate. Defaut is 0.0001

In [18]:
lm.optimizer.lr = 0.1
lm.fit_generator(batches, batches.N, nb_epoch=3,
                validation_data=test_batches, nb_val_samples=test_batches.N)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7faf22bab7b8>

## Single Dense Layer

In [19]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

fc = get_fc_model()

  .format(self.name, input_shape))


As before, let's start with 1 epoch and a default low learning rate.

In [21]:
fc.fit_generator(batches, batches.N, nb_epoch=1, 
                validation_data=test_batches, nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7faf2218dc50>

In [22]:
fc.optimizer.lr=0.01
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7faf2218dd68>

## Basic 'VGG-style' CNN

In [None]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28, 28)),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [24]:
model = get_model()
model.fit_generator(batches, batches.N, nb_epoch=1,
                   validation_data=test_batches, nb_val_samples=test_batches.N)

  .format(self.name, input_shape))


Epoch 1/1


<keras.callbacks.History at 0x7faf15ff2da0>

In [25]:
model.optimizer.lr=0.1
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7faf22b3d7b8>

In [26]:
model.optimizer.lr=0.01
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7faf22b3d4a8>