In [1]:
import mxnet as mx
from mxnet import nd
from mxnet import autograd
from mxnet import gluon

In [2]:
ctx = mx.cpu()

In [3]:
mnist = mx.test_utils.get_mnist()
batch_size = 64
num_inputs = 784
num_outputs = 10
train_data = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"],
                               batch_size, shuffle=True)
test_data = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"],
                              batch_size, shuffle=True)

In [4]:
num_hidden = 256
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_inputs, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

In [5]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [6]:
loss = gluon.loss.SoftmaxCrossEntropyLoss()

In [7]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})


In [8]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    data_iterator.reset()
    for i, batch in enumerate(data_iterator):
        data = batch.data[0].as_in_context(ctx).reshape((-1, 784))
        label = batch.label[0].as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [9]:
epochs = 10
moving_loss = 0.

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx).reshape((-1, 784))
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            cross_entropy = loss(output, label)
            cross_entropy.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        if i == 0:
            moving_loss = nd.mean(cross_entropy).asscalar()
        else:
            moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.18577311078, Train_acc 0.952891791045, Test_acc 0.951632165605
Epoch 1. Loss: 0.112965056105, Train_acc 0.97099880064, Test_acc 0.964470541401
Epoch 2. Loss: 0.0803393790788, Train_acc 0.978294909382, Test_acc 0.970939490446
Epoch 3. Loss: 0.0605976452684, Train_acc 0.983608742004, Test_acc 0.973228503185
Epoch 4. Loss: 0.0464805106061, Train_acc 0.987273454158, Test_acc 0.976015127389
Epoch 5. Loss: 0.0356949582383, Train_acc 0.989488939232, Test_acc 0.975915605096
Epoch 6. Loss: 0.0271811785094, Train_acc 0.99172108209, Test_acc 0.976512738854
Epoch 7. Loss: 0.0206068861868, Train_acc 0.994153118337, Test_acc 0.977707006369
Epoch 8. Loss: 0.0155883350472, Train_acc 0.996002132196, Test_acc 0.978602707006
Epoch 9. Loss: 0.011715119706, Train_acc 0.996934968017, Test_acc 0.97840366242
