In [26]:
from superai.nn.layer.fc import FullyConnected
from superai.nn.layer.activator import Activator
from superai.nn.model.nnet import Sequence
import numpy as np
import os
import struct
from superai.nn.optimizer.adam import Adam
from superai.nn.optimizer.adagrad import AdaGrad
from superai.nn.optimizer.momentum import Momentum
from superai.nn.optimizer.rmsprop import RMSProp

In [27]:
def convert_to_one_hot(y, C):
    return np.eye(C)[y.reshape(-1)].T

def calculate_accuracy(model, X, Y):
    m, n = X.shape
    Y_pre = model.predict(X)
    Y_pre = Y_pre == np.max(Y_pre, axis=0, keepdims = True)
    C = np.abs(Y_pre - Y)
    error = np.sum(C) / 2
    print(1 - error / n)

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)
    return images, labels


X_train, Y_train = load_mnist(path="MNIST")
X_train = X_train.transpose()
Y_train = convert_to_one_hot(Y_train, 10)
m, n = X_train.shape
# normalization fc-layer inputs
nn_input_min = np.min(X_train, axis=1, keepdims=True)
nn_input_max = np.max(X_train, axis=1, keepdims=True)
nn_input_final = ((X_train - nn_input_min) / (nn_input_max - nn_input_min + 1e-11))
X_train = nn_input_final

linearLayer1 = FullyConnected(m, 40)
linearLayer1.layer = 1
activator1 = Activator('tanh')
activator1.layer = 2
linearLayer2 = FullyConnected(40, 20)
linearLayer2.layer = 3
activator2 = Activator('tanh')
activator2.layer = 4
linearLayer3 = FullyConnected(20, 10)
linearLayer3.layer = 5
# outputLayer = NNActivator(linearLayer3, 'softmax', 10)

model = Sequence([linearLayer1, activator1, linearLayer2, activator2, linearLayer3], learning_rate=0.01, iteration_count=100, use_mini_batch=False, mini_batch_size=32)
#optimizer = Adam()
#optimizer = RMSProp()
#optimizer = Momentum()
#optimizer = AdaGrad()
#optimizer.run(model, X_train, Y_train)
model.fit(X_train, Y_train)

X_test, Y_test = load_mnist(path="MNIST", kind="t10k")
X_test = X_test.transpose()
Y_test = convert_to_one_hot(Y_test, 10)

calculate_accuracy(model, X_train, Y_train)
calculate_accuracy(model, X_test, Y_test)

print("over")

iteration1  cost:116.2827676710392, accuracy:0.11519999999999997
iteration2  cost:112.47058715042519, accuracy:0.12031666666666663
iteration3  cost:108.79053847268425, accuracy:0.12758333333333338
iteration4  cost:105.23768394697568, accuracy:0.13460000000000005
iteration5  cost:101.80718375854578, accuracy:0.1433833333333333
iteration6  cost:98.49435553765815, accuracy:0.15346666666666664
iteration7  cost:95.29471354227569, accuracy:0.1629166666666667
iteration8  cost:92.20398638678604, accuracy:0.1729666666666667
iteration9  cost:89.21811804357151, accuracy:0.1842666666666667
iteration10  cost:86.33325838789607, accuracy:0.19518333333333338
iteration11  cost:83.54574863569007, accuracy:0.20648333333333335
iteration12  cost:80.85210531285549, accuracy:0.21756666666666669
iteration13  cost:78.2490048262989, accuracy:0.22914999999999996
iteration14  cost:75.73326959278681, accuracy:0.24076666666666668
iteration15  cost:73.3018560092736, accuracy:0.25221666666666664
iteration16  cost:70.