In [2]:
from superai.nn.layer.fc import FullyConnected
from superai.nn.layer.activator import Activator
from superai.nn.model.nnet import Sequence
import numpy as np
import os
import struct
from superai.nn.optimizer.optimizer import Adam

In [3]:
def convert_to_one_hot(y, C):
    return np.eye(C)[y.reshape(-1)].T

def calculate_accuracy(model, X, Y):
    m, n = X.shape
    Y_pre = model.predict(X)
    Y_pre = Y_pre == np.max(Y_pre, axis=0, keepdims = True)
    C = np.abs(Y_pre - Y)
    error = np.sum(C) / 2
    print(1 - error / n)

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)
    return images, labels


X_train, Y_train = load_mnist(path="MNIST")
X_train = X_train.transpose()
Y_train = convert_to_one_hot(Y_train, 10)
m, n = X_train.shape
# normalization fc-layer inputs
nn_input_min = np.min(X_train, axis=1, keepdims=True)
nn_input_max = np.max(X_train, axis=1, keepdims=True)
nn_input_final = ((X_train - nn_input_min) / (nn_input_max - nn_input_min + 1e-11))
X_train = nn_input_final

linearLayer1 = FullyConnected(m, 40)
linearLayer1.layer = 1
activator1 = Activator('tanh')
linearLayer2 = FullyConnected(40, 20)
linearLayer2.layer = 2
activator2 = Activator('tanh')
linearLayer3 = FullyConnected(20, 10)
linearLayer3.layer = 3
# outputLayer = NNActivator(linearLayer3, 'softmax', 10)

model = Sequence([linearLayer1, activator1, linearLayer2, activator2, linearLayer3], learning_rate=0.02, iteration_count=100, lambd=0,
                 use_mini_batch=False, mini_batch_size=64)
adamOpt = Adam()
adamOpt.run(model, X_train, Y_train)
# model.fit(X_train, Y_train)

X_test, Y_test = load_mnist(path="MNIST", kind="t10k")
X_test = X_test.transpose()
Y_test = convert_to_one_hot(Y_test, 10)

calculate_accuracy(model, X_train, Y_train)
calculate_accuracy(model, X_test, Y_test)

print("over")

iteration1  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration2  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration3  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration4  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration5  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration6  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration7  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration8  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration9  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration10  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration11  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration12  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration13  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration14  cost:2.3866864108175503, accuracy:0.11519999999999997
iteration15  cost:2.3866864108175503, accuracy:0.11519999999999997
iter