In [1]:
# from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
import numpy as np

In [32]:
batch_size = 64
num_outputs = 10

In [30]:
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)

In [33]:
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [34]:
num_fc = 512
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation = 'relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))

    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation = 'relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))

    net.add(gluon.nn.Dense(num_fc, activation = 'relu'))

    net.add(gluon.nn.Dense(num_outputs))

In [36]:
ctx = mx.cpu()

In [37]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

In [38]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [39]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  移動平均
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.0898054249745, Train_acc 0.976566666667, Test_acc 0.9766
Epoch 1. Loss: 0.0538793658864, Train_acc 0.988333333333, Test_acc 0.9877
Epoch 2. Loss: 0.037650245783, Train_acc 0.990983333333, Test_acc 0.9887
Epoch 3. Loss: 0.0316568343098, Train_acc 0.993366666667, Test_acc 0.9882
Epoch 4. Loss: 0.0275906233683, Train_acc 0.99525, Test_acc 0.9896
Epoch 5. Loss: 0.0206449343342, Train_acc 0.995483333333, Test_acc 0.9913
Epoch 6. Loss: 0.0178021766784, Train_acc 0.996966666667, Test_acc 0.9922
Epoch 7. Loss: 0.0142040655628, Train_acc 0.998, Test_acc 0.9921
Epoch 8. Loss: 0.0105888281215, Train_acc 0.993933333333, Test_acc 0.9883
Epoch 9. Loss: 0.0099151757762, Train_acc 0.998966666667, Test_acc 0.992
