In [1]:
import numpy as np
import h5py
import time
import matplotlib.pyplot as plt
import sys
%matplotlib inline

np.random.seed(int(time.time()))

In [9]:
class MLP:
    
    def __init__(self, train_file, test_file=None):
        # extract training data
        data = self.read_data(train_file)
        # split data into training and validation
        self.training = {}
        self.test = {}
        self.validation = {}
#         p = np.random.permutation(data['xdata'].shape[0])
        if test_file is None:
            self.training['xdata'], self.validation['xdata'], _ = np.split(data['xdata'] / 255, [50000, 60000])
            self.training['ydata'], self.validation['ydata'], _ = np.split(data['ydata'], [50000, 60000])
        else:
            t_data = self.read_data(test_file)
            self.training['xdata'] = data['xdata'] / 255
            self.training['ydata'] = data['ydata']
            self.test['xdata'] = t_data['xdata'] / 255
            self.test['ydata'] = t_data['ydata']
            print(self.training['xdata'].shape)
            print(self.test['xdata'].shape)
            print(self.training['ydata'].shape)
            print(self.test['ydata'].shape)
    
    def read_data(self, filename):
        data = {}
        with h5py.File(filename, 'r') as hf:
            for k in hf.keys():
                data[k] = hf[k][:]
        return data
    
    class layer:
        def __init__(self, input_size, output_size, activate='ReLU', regularizer=None):
            self.weight = np.random.normal(0, 0.1, (input_size, output_size))
            self.bias = np.random.normal(0, 0.1, (output_size, ))
            self.activate = activate
            self.regularizer = regularizer
            
        def set_par(self, w, b):
            # for testing
            self.weight = w[:]
            self.bias = b[:]
        
        def tanh(self, x):
            return np.tanh(x)

        def ReLU(self, x):
            return np.where(x > 0.0, x, 0.0)
        
        def softmax(self, x):
            x = x - np.max(x, axis=1).reshape(x.shape[0], 1)
            return (np.exp(x) / np.sum(np.exp(x), axis=1).reshape(x.shape[0], 1))
#             return np.exp(x - np.mean(x)) / np.sum(np.exp(x - np.mean(x)))
#             return (np.exp(x).T / np.sum(np.exp(x), axis=1)).T
#             tmp = np.exp(x - np.max(x))
#             return tmp / np.sum(tmp)
        
        def forward(self, x):
            self.x = x[:]
            self.s = self.x @ self.weight + self.bias
            if self.activate == 'tanh':
                return self.tanh(self.s)
            elif self.activate == 'ReLU':
                return self.ReLU(self.s)
            elif self.activate == 'softmax':
                return self.softmax(self.s)

        def backward(self, delta):
            self.delta = delta[:]
            if self.activate == 'tanh':
                self.delta *= (1 - np.square(self.tanh(self.s)))
            elif self.activate == 'ReLU':
                self.delta *= np.where(self.s >= 0.0, 1.0, 0.0)
            elif self.activate == 'softmax':
                pass
            d = self.delta @ self.weight.T
            return d
        
        def update(self, eta):
#             print(self.x.reshape((self.x.shape[0], -1)).shape)
#             print(self.delta.reshape((self.delta.shape[0], -1)).shape)
#             print(eta)
            if self.regularizer is not None:
                if self.regularizer[0] == 'l1':
                    pass
                elif self.regularizer[0] == 'l2':
    #                 print('use l2-------')
                    self.weight -= (eta * 2 * self.regularizer[1] * self.weight)
                    self.bias -= (eta * 2 * self.regularizer[1] * self.bias)
            update_w = (self.x.T @ self.delta) / self.x.shape[0]
            self.weight -= (eta * update_w)
#             print(self.bias.shape)
#             print(self.delta.shape)
            update_b = np.sum(self.delta, axis=0) / self.x.shape[0]
            self.bias -= (eta * update_b)
            assert not True in np.isnan(self.weight) and not True in np.isnan(self.bias), "NaN error during weights update!!!"
#             print(np.sum(self.delta,axis=0).shape)
    
    def network(self, net_conf):
        assert len(net_conf['architecture'].keys()) >= 2, "Network Architecture Error!!"
        self.net = {}
        for l in net_conf['architecture']:
            self.net[l] = self.layer(net_conf['architecture'][l][0], net_conf['architecture'][l][1], net_conf['architecture'][l][2], net_conf['training']['regularizer'])
        self.net_conf = net_conf
#         self.net['layer1'] = self.layer(784, 512, 'ReLU')
#         self.net['layer2'] = self.layer(512, 512, 'ReLU')
#         self.net['layer3'] = self.layer(512, 10, 'softmax')
    
    def feedforward(self, x):
#         print(x)
        for n in self.net:
            print('forward', n)
            x = self.net[n].forward(x)
#             print(x)
        return x
    
    def backpropagation(self, y_hat, y, eta):
        delta = y_hat - y
        print(delta.shape)
#         assert 0, 'test'
        for n in reversed(list(self.net.keys())):
            delta = self.net[n].backward(delta)
        # update parameters
        for n in self.net:
            self.net[n].update(eta)

    def cost(self, y_hat, y):
        # cross entropy
        return -np.sum(y * np.log(y_hat))
    
    def accuracy(self, x, y):
        y_hat = self.feedforward(x)
        pred = np.argmax(y_hat, axis=1)
        y_ = np.argmax(y, axis=1)
        print(pred.shape)
        return np.sum(pred == y_) / pred.shape[0]

    def sgd(self, eta=0.001, epoch=50, minibatch=500, regularizer=None, momentum=False, decay=False):
        # SGD
        iteration = self.training['xdata'].shape[0] // minibatch
        print("<====start training====>")
        print("batch size is", minibatch)
        self.train_acc = []
        self.valid_acc = []
        self.test_acc = []
        self.decay_pts = []
        for i in range(epoch):
            # 1. randomly shuffle
            p = np.random.permutation(self.training['xdata'].shape[0])
#             print(p)
            for j in range(iteration):
                string = "\r[epoch %d/%d] iteration#%d " % (i + 1, epoch, j + 1)
                print(self.training['xdata'][j * batch_size:(j + 1) * batch_size].shape)
                print(self.training['xdata'][p][j * minibatch:(j + 1) * minibatch].shape)
                y_hat = self.feedforward(self.training['xdata'][p][j * minibatch:(j + 1) * minibatch])
#                 print(y_hat.shape)
                self.backpropagation(y_hat, self.training['ydata'][p][j * minibatch:(j + 1) * minibatch], eta)
                print("err:", err)
#                 acc = self.accuracy(self.training['xdata'][p][j * batch_size:(j + 1) * batch_size], self.training['ydata'][p][j * batch_size:(j + 1) * batch_size])
                if bool(self.validation) is False:
                    acc = self.accuracy(self.test['xdata'], self.test['ydata'])
                    print(string + "test acc: %lf " % (acc), end='')
                else:
                    print('here????')
                    v_acc = self.accuracy(self.validation['xdata'], self.validation['ydata'])
                    print(string + "valid acc: %lf " % (v_acc), end='')
            t_acc = self.accuracy(self.training['xdata'], self.training['ydata'])
            self.train_acc.append(t_acc)
            if bool(self.validation) is False:
                acc = self.accuracy(self.test['xdata'], self.test['ydata'])
                self.test_acc.append(acc)
                print("[epoch end] train acc: %lf, test acc: %lf " % (t_acc, acc), end='')
            else:
                v_acc = self.accuracy(self.validation['xdata'], self.validation['ydata'])
                self.valid_acc.append(v_acc)
                print("[epoch end] train acc: %lf, valid acc: %lf " % (t_acc, v_acc), end='')
            if decay is True and (i + 1) % int(np.ceil(epoch / 3)) == 0:
                eta /= 2.0
                self.decay_pts.append(i)
                print("Learning rate decay by 2: %lf" % (eta), end='')
            print('')
                break
            break

    def setup(self, net_conf):
        self.network(net_conf)

    def train(self, net_conf):
        if net_conf['training']['optimizer'] == 'sgd':
            self.sgd(net_conf['training']['eta'], net_conf['training']['epoch'],
                     net_conf['training']['minibatch'], net_conf['training']['regularizer'],
                     net_conf['training']['momentum'], net_conf['training']['decay'])
            
    def save(self, filename):
        with h5py.File(filename, 'w') as hf:
            for n in self.net:
                hf.create_dataset(n + '_w', data=self.net[n].weight)
                hf.create_dataset(n + '_b', data=self.net[n].bias)
            hf.create_dataset('train_acc', data=self.train_acc)
            hf.create_dataset('valid_acc', data=self.valid_acc)
            hf.create_dataset('test_acc', data=self.test_acc)
            hf.create_dataset('decay_pts', data=self.decay_pts)
                
    def load(self, filename):
        with h5py.File(filename, 'r') as hf:
            for n in self.net:
                self.net[n].set_par(hf[n + '_w'][:], hf[n + '_b'][:])
            self.train_acc = hf['train_acc'][:]
            self.valid_acc = hf['valid_acc'][:]
            self.test_acc = hf['test_acc'][:]
            self.decay_pts = hf['decay_pts'][:]

        
mlp = MLP('mnist_traindata.hdf5', 'mnist_testdata.hdf5')

(60000, 784)
(10000, 784)
(60000, 10)
(10000, 10)


In [14]:
# mlp.optimizer(eta=0.01)
net_conf = {
    'architecture': {
        #'layer1': input layer,
        'layer2': [784, 48, 'ReLU'],
        'layer3': [48, 10, 'softmax'],
    },
    'training': {
        'eta': 1.0,
        'optimizer': 'sgd',
        'epoch': 50,
        'minibatch': 500,
        'regularizer': ['l2', 0.001],
        'momentum': False,
        'decay': True,
    }
}

In [15]:
mlp.setup(net_conf)
mlp.train(net_conf)

<====start training====>
batch size is 500
[epoch 1/50] iteration#120 test acc: 0.924000 [epoch end] train acc: 0.922167, test acc: 0.924000 
[epoch 2/50] iteration#120 test acc: 0.943800 [epoch end] train acc: 0.941667, test acc: 0.943800 
[epoch 3/50] iteration#120 test acc: 0.948600 [epoch end] train acc: 0.949783, test acc: 0.948600 
[epoch 4/50] iteration#120 test acc: 0.952000 [epoch end] train acc: 0.953050, test acc: 0.952000 
[epoch 5/50] iteration#120 test acc: 0.957900 [epoch end] train acc: 0.959550, test acc: 0.957900 
[epoch 6/50] iteration#120 test acc: 0.957400 [epoch end] train acc: 0.960983, test acc: 0.957400 
[epoch 7/50] iteration#120 test acc: 0.950800 [epoch end] train acc: 0.956083, test acc: 0.950800 
[epoch 8/50] iteration#120 test acc: 0.958400 [epoch end] train acc: 0.960200, test acc: 0.958400 
[epoch 9/50] iteration#120 test acc: 0.961100 [epoch end] train acc: 0.963417, test acc: 0.961100 
[epoch 10/50] iteration#120 test acc: 0.962000 [epoch end] train a