# Very deep networks with repeating elements


## 



## VGG


In [11]:
from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
import numpy as np
mx.random.seed(1)

In [12]:
ctx = mx.gpu()

## Load up a dataset


In [13]:
def transformer(data, label):
    data = mx.image.imresize(data, 224, 224)
    data = mx.nd.transpose(data, (2,0,1))
    data = data.astype(np.float32)
    return data, label


In [14]:
batch_size = 64
train_data = gluon.data.DataLoader(
    gluon.data.vision.CIFAR10('./data', train=True, transform=transformer),
    batch_size=batch_size, shuffle=True, last_batch='discard')

test_data = gluon.data.DataLoader(
    gluon.data.vision.CIFAR10('./data', train=False, transform=transformer),
    batch_size=batch_size, shuffle=False, last_batch='discard')

In [15]:
for d, l in train_data:
    break

In [16]:
print(d.shape, l.shape)

(64, 3, 224, 224) (64,)


In [17]:
d.dtype

numpy.float32

## The VGG architecture


In [18]:
def add_vgg_block(net, convs, channels):
    for i in range(convs):
        net.add(gluon.nn.Conv2D(channels=channels, kernel_size=3, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))    


In [19]:
num_classes = 10
vgg_net = gluon.nn.Sequential()
with vgg_net.name_scope():
    ########################################
    #  First convolutional block 
    ########################################
    add_vgg_block(vgg_net, 2, 64)

    ########################################
    #  Second convolutional block
    ########################################
    add_vgg_block(vgg_net, 2, 128)
    
    ########################################
    #  Third convolutional block
    #########################################
    add_vgg_block(vgg_net, 3, 256)
    
    ########################################
    # Fourth convolutional block
    ########################################
    add_vgg_block(vgg_net, 3, 512)
    
    ########################################
    # Fifth convolutional block
    ########################################    
    add_vgg_block(vgg_net, 3, 512)

    ########################################
    # Flatten and apply fullly connected layers
    ########################################
    vgg_net.add(gluon.nn.Flatten())
    vgg_net.add(gluon.nn.Dense(4096, activation="relu"))
    vgg_net.add(gluon.nn.Dense(4096, activation="relu"))
    vgg_net.add(gluon.nn.Dense(num_classes))


## Initialize parameters

In [20]:
vgg_net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

## Optimizer

In [21]:
trainer = gluon.Trainer(vgg_net.collect_params(), 'sgd', {'learning_rate': .001})

## Softmax cross-entropy loss

In [22]:
loss = gluon.loss.SoftmaxCrossEntropyLoss()

## Evaluation loop

In [None]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for d, l in data_iterator:
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

## Training loop

In [None]:
epochs = 10

for e in range(epochs):
    moving_loss = 0.
    for d, l in train_data:
        data = d.as_in_context(ctx)
        label = l.as_in_context(ctx)
        with autograd.record():
            output = vgg_net(data)
            cross_entropy = loss(output, label)
        cross_entropy.backward()
        trainer.step(data.shape[0])
        
        moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()
            
    test_accuracy = evaluate_accuracy(test_data, alex_net)
    train_accuracy = evaluate_accuracy(train_data, alex_net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))    