In [1]:
from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
import numpy as np
mx.random.seed(1)

In [2]:
ctx = mx.cpu()


In [3]:
import json

with open("./data/train_data_50000.json") as json_data:
    train_data = json.load(fp=json_data)
    
with open("./data/test_data_50000.json") as json_data:
    test_data = json.load(fp=json_data)

train_X = np.array(train_data['data'])
train_Y = np.array(train_data['label'])

test_X = np.array(test_data['data'])
test_Y = np.array(test_data['label'])

In [4]:
train_X = train_X.reshape(50000,1,8,8)
test_X = test_X.reshape(50000,1,8,8)


In [5]:
#mnist = mx.test_utils.get_mnist()
batch_size = 200
num_inputs = 64
num_outputs = 2
train_data = mx.io.NDArrayIter(train_X, train_Y,
                               batch_size, shuffle=True)
test_data = mx.io.NDArrayIter(test_X, test_Y,
                               batch_size, shuffle=True)

In [6]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=20, kernel_size=2, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))            
    net.add(gluon.nn.Conv2D(channels=20, kernel_size=2, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))            
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(50, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

In [7]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [8]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [9]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

In [10]:

def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    data_iterator.reset()
    for i, batch in enumerate(data_iterator):
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [11]:
epochs = 40
smoothing_constant = .01

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
        
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Val_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.692594754921, Train_acc 0.52782, Test_acc 0.50966
Epoch 1. Loss: 0.691368245732, Train_acc 0.53114, Test_acc 0.50542
Epoch 2. Loss: 0.69062504942, Train_acc 0.53294, Test_acc 0.50394
Epoch 3. Loss: 0.690105664434, Train_acc 0.53594, Test_acc 0.50374
Epoch 4. Loss: 0.689646836355, Train_acc 0.53846, Test_acc 0.50418
Epoch 5. Loss: 0.689192030216, Train_acc 0.54204, Test_acc 0.50494
Epoch 6. Loss: 0.688704371279, Train_acc 0.54454, Test_acc 0.50522
Epoch 7. Loss: 0.688128962913, Train_acc 0.54708, Test_acc 0.50648
Epoch 8. Loss: 0.687421714543, Train_acc 0.54966, Test_acc 0.50986
Epoch 9. Loss: 0.686651458131, Train_acc 0.55242, Test_acc 0.51234
Epoch 10. Loss: 0.685820709168, Train_acc 0.55432, Test_acc 0.51496
Epoch 11. Loss: 0.685011139031, Train_acc 0.555, Test_acc 0.518
Epoch 12. Loss: 0.684211759323, Train_acc 0.55802, Test_acc 0.52038
Epoch 13. Loss: 0.683462451476, Train_acc 0.5597, Test_acc 0.52158
Epoch 14. Loss: 0.682668921907, Train_acc 0.56136, Test_acc 0.52