And now our final model, VGG16 in Gluon

In [1]:
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
from time import time

  from ._conv import register_converters as _register_converters


In [2]:
num_outputs = 10 # 10 output digits
batch_size = 128 # mini batch
epochs = 10 # total training loops
learning_rate = 0.01 # amount we update parameters

CIFAR images again. Note the need to transform so we are (color channel, x, y).

In [3]:
def transform(data, label):
    data = mx.nd.transpose(data, (2,0,1))
    data = data.astype(np.float32) / 255.0
    return data, label
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.CIFAR10(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.CIFAR10(train=False, transform=transform),
                                     batch_size, shuffle=False)

The parameters for the VGG blocks, 5 blocks in all.

In [4]:
kernels = [3, 3, 3, 3, 3]
filters = [64, 128, 256, 512, 512]
repeats = [2, 2, 3, 3, 3]
pooling = [2, 2, 2, 2, 2]
strides = [2, 2, 2, 2, 2]
dense_units = [4096, 4096]

Again --  you don't have a final softmax *layer*, MxNet handles he softmax inside this loss function, so the output is a straight linear mapping -- no activation function.


In [5]:
vgg16 = gluon.nn.HybridSequential()
with vgg16.name_scope():
    for kernel, filter, pool, stride, repeat in zip(kernels, filters, pooling, strides, repeats):
        for _ in range(0, repeat):
            vgg16.add(gluon.nn.Conv2D(channels=filter, 
                                    kernel_size=kernel,
                                    padding=(kernel//2),
                                    activation='relu'))
        vgg16.add(gluon.nn.MaxPool2D(pool_size=pool, 
                                       strides=stride,
                                       padding=(kernel//2)))

    vgg16.add(gluon.nn.Flatten())

    for units in dense_units:
        vgg16.add(gluon.nn.Dense(units, activation='relu'))
    
    vgg16.add(gluon.nn.Dense(num_outputs))

Gluon requires a context, either `cpu` or `gpu`. You can change this to `cpu` if needed.

In [6]:
ctx = mx.gpu()

Parameter initialization.

In [7]:
vgg16.collect_params().initialize(mx.init.Xavier(), ctx=ctx)

Now let't take a look at the resulting network. We need to feed in a sample batch to infer the network size.

In [8]:
for i, (d, l) in enumerate(train_data):
    print(vgg16.summary(d.as_in_context(ctx)))
    break

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
               Input                            (128, 3, 32, 32)               0
        Activation-1   <Symbol hybridsequential0_conv0_relu_fwd>               0
        Activation-2                           (128, 64, 32, 32)               0
            Conv2D-3                           (128, 64, 32, 32)            1792
        Activation-4   <Symbol hybridsequential0_conv1_relu_fwd>               0
        Activation-5                           (128, 64, 32, 32)               0
            Conv2D-6                           (128, 64, 32, 32)           36928
         MaxPool2D-7                           (128, 64, 17, 17)               0
        Activation-8   <Symbol hybridsequential0_conv2_relu_fwd>               0
        Activation-9                          (128, 128, 17, 17)               0
           Conv2D-10        

If you network doesn't change shape, you can `hybridize` it, which makes Gluon run in a precomplied mode much like Keras.

In [9]:
vgg16.hybridize()

And as always, learning is done with an optimizer and a loss function, learning a classifier with categorical cross entropy.

In [10]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(vgg16.collect_params(), 'sgd', {'learning_rate': learning_rate})

Accuracy!

In [11]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

And the training loop. 

In [12]:
smoothing_constant = .01
moving_loss = 0.0

for e in range(epochs):
    start = time()
    for i, (d, l) in enumerate(train_data):
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        with autograd.record():
            output = vgg16(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        #  Keep a moving average of the losses
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
    elapsed = time() - start

    test_accuracy = evaluate_accuracy(test_data, vgg16)
    train_accuracy = evaluate_accuracy(train_data, vgg16)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s, Time %f" % (e, moving_loss, train_accuracy, test_accuracy, elapsed))

Epoch 0. Loss: 2.3023775815029266, Train_acc 0.13882, Test_acc 0.1424, Time 25.382986
Epoch 1. Loss: 2.302042154423849, Train_acc 0.12938, Test_acc 0.1307, Time 24.062614
Epoch 2. Loss: 2.3013975877078985, Train_acc 0.18008, Test_acc 0.1877, Time 24.478345
Epoch 3. Loss: 2.2999088895211575, Train_acc 0.15604, Test_acc 0.1578, Time 24.127373
Epoch 4. Loss: 2.295473200023417, Train_acc 0.17238, Test_acc 0.1762, Time 24.556334
Epoch 5. Loss: 2.2741609553267152, Train_acc 0.18428, Test_acc 0.1857, Time 24.191865
Epoch 6. Loss: 2.1467806422007767, Train_acc 0.21324, Test_acc 0.218, Time 24.074978
Epoch 7. Loss: 2.0475947909910936, Train_acc 0.23612, Test_acc 0.2464, Time 24.471702
Epoch 8. Loss: 1.9994886837158794, Train_acc 0.28078, Test_acc 0.2859, Time 24.481846
Epoch 9. Loss: 1.9556362338472784, Train_acc 0.2652, Test_acc 0.2716, Time 24.459939
