<a href="https://colab.research.google.com/github/wisdomnet/too/blob/master/built_step_by_step.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')
%cd "/content/drive/MyDrive/Colab Notebooks/"
os.chdir('./NNToyFramework/python-neural-networks/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks


In [None]:
import sys
sys.path.append('..')

import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
import copy

In [None]:
class Layer:
    def __init__(self, input_shape=None, output_shape=None, trainable=True):
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.trainable = trainable
        self.input = None
        self.output = None
        

    def on_input_shape(self):
        pass

    def initialize(self, initializer):
        pass

    def forward(self, input):
        raise NotImplementedError

    def backward(self, output_gradient):
        raise NotImplementedError

    def forward_batch(self, input, batch_size):
        batch_output=[]
        for i in range(batch_size):
            batch_output.append(self.forward(input[i]))
        self.input = input
        self.output = np.array(batch_output)         
        return self.output   

    def backward_b(self, output_gradient, input_layer, output_layer):
        raise NotImplementedError 
    
    def backward_batch(self, output_gradient, batch_size):
        batch_first=[]
        batch_second=[]
        for i in range(batch_size):
            first, second =self.backward_b(output_gradient=output_gradient[i],
                                           input_layer=self.input[i],
                                           output_layer=self.output[i])
            batch_first.append(first)
            batch_second.append(second)
        return batch_first,batch_second

    def update(self, updates):
        if self.trainable:
            raise NotImplementedError

In [None]:
#Reshape((1, 784), input_shape=(28, 28))
class Reshape(Layer):
    def __init__(self, output_shape, **kwargs):
        super().__init__(output_shape=output_shape, trainable=False, **kwargs)

    
    def forward(self, input):
        return np.reshape(input, self.output_shape)
    
    def backward(self, output_gradient):
        return np.reshape(output_gradient, self.input_shape), None
    
    def backward_b(self, output_gradient, input_layer, output_layer):
        return np.reshape(output_gradient, self.input_shape), None


In [None]:
#Dense(50)
class Dense(Layer):
    def __init__(self, output_size, **kwargs):
        super().__init__(output_shape=(1, output_size), **kwargs)

    def initialize(self, initializer):
        input_size, output_size = self.input_shape[1], self.output_shape[1]
        self.weights = initializer.get(input_size, output_size)
        self.bias = initializer.get(1, output_size)
        return [(input_size, output_size), (1, output_size)]

    def get_weights(self):
        return [self.weights,self.bias]   
    def set_weights(self,weights):
        self.weights=weights[0]
        self.bias=weights[1]
        
    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias
    
    

    def backward(self, output_gradient):
        return np.dot(output_gradient, self.weights.T), [
            np.dot(self.input.T, output_gradient),
            output_gradient
        ]

    def backward_b(self, output_gradient, input_layer, output_layer):
        return np.dot(output_gradient, self.weights.T), [
            np.dot( input_layer.T  , output_gradient),
            output_gradient
        ]    
    

    def update(self, updates):
        
        check=True
        if(np.isnan(np.sum(updates[0]))):
            print('backward update[0]:nan')
            check=False
            
            
        if(np.isnan(np.sum(updates[1]))):
            print('backward update[1]:nan')
            check=False
        if(np.isnan(np.sum(self.weights))):
            print('backward self.weights:nan')
            check=False
        if(np.isnan(np.sum(self.bias))):
            print('backward self.bias:nan')
            check=False
        
        self.weights += updates[0]
        self.bias += updates[1]
        
        if(np.isnan(np.sum(self.weights))):
            print('backward self.weights:nan')
            check=False
        if(np.isnan(np.sum(self.bias))):
            print('backward self.bias:nan')
            check=False
        return check

In [None]:
class BatchNormalization(Layer):
    def __init__(self, epsilon=0.001, **kwargs):
        super().__init__(**kwargs)
        self.epsilon = epsilon

    def initialize(self, initializer):
        self.gamma = initializer.get()
        self.beta = initializer.get()
        return [(1), (1)]

    def forward(self, input):
        self.input = input
        self.mu = np.mean(input)
        self.sigma2 = np.var(input)
        self.x_hat = (input - self.mu) / np.sqrt(self.sigma2 + self.epsilon)
        return self.gamma * self.x_hat + self.beta

    def forward_batch(self, input, batch_size):
        self.input = input
        self.mu = np.mean(input)
        self.sigma2 = np.var(input)
        self.x_hat = (input - self.mu) / np.sqrt(self.sigma2 + self.epsilon)
        return self.gamma * self.x_hat + self.beta

    def backward(self, output_gradient):
        N = self.input.size
        dx_hat = output_gradient * self.gamma
        tmp = N * np.sqrt(self.sigma2 + self.epsilon)
        input_gradient = (N * dx_hat - np.sum(dx_hat, axis=0) - self.x_hat * np.sum(dx_hat * self.x_hat, axis=0)) / tmp
        return input_gradient, [
            np.sum(output_gradient * self.x_hat, axis=0),
            np.sum(output_gradient, axis=0)
        ]

    def update(self, updates):
        self.gamma += updates[0]
        self.beta += updates[1]

In [None]:
class Activation(Layer):
    def __init__(self, activation, activation_prime, **kwargs):
        super().__init__(trainable=False, **kwargs)
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(input)
        
    def backward(self, output_gradient):
        return output_gradient * self.activation_prime(self.input), None

    def backward_b(self, output_gradient, input_layer, output_layer):
        return output_gradient * self.activation_prime(input_layer), None

In [None]:
class Softmax(Layer):
    def __init__(self, **kwargs):
        super().__init__(trainable=False, **kwargs)

    def forward(self, input):
        self.input = input
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
   
    def backward(self, output_error): 
        input_error = np.zeros(output_error.shape)    
        out = np.tile(self.output.T, output_error.size)
        return self.output * np.dot(output_error, np.identity(output_error.size) - out), None

    
    def backward_b(self, output_gradient, input_layer, output_layer):
        input_error = np.zeros(output_gradient.shape)   
        out = np.tile(output_layer.T, output_gradient.size)   
        return output_layer * np.dot(output_gradient, np.identity(output_gradient.size) - out), None
    

In [None]:
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.power(np.tanh(x), 2)

class Tanh(Activation):
    def __init__(self, **kwargs):
        super().__init__(tanh, tanh_prime, **kwargs)

In [None]:
class Loss:
    def call(self, y_true, y_pred):
        raise NotImplementedError

    def prime(self, y_true, y_pred):
        raise NotImplementedError

class MSE(Loss):
    def call(self, y_pred, y_true):
        return np.mean(np.power(y_true - y_pred, 2))
    
    def call_batch(self, y_pred, y_true, batch_size):
        batch_result=[]
        for i in range(batch_size):
            batch_result.append(self.call(y_pred[i],y_true[i]))
        batch_result=np.array(batch_result)
        return np.mean(batch_result)

    def prime(self, y_true, y_pred):
        check_prime=2 * (y_pred - y_true) / y_pred.size
        if(np.isnan(np.sum(check_prime))):
            print('fuck prime')
        return check_prime
    
    def prime_batch(self, y_true, y_pred, batch_size):
        batch_result=[]
        for i in range(batch_size):
            batch_result.append(self.prime(y_true[i],y_pred[i]))
        batch_result=np.array(batch_result)
        return batch_result


In [None]:
class OptimizerBase:
    def __init__(self, **kwargs):
        self.gradients = []
        self.shape = kwargs['shape']
    
    def set_gradients(self, gradients):
        self.gradients.append(gradients)
        #print(len(self.gradients))
        #print(len(self.gradients[0].shape))
        
    
    def get_gradients(self, iteration):
        updated_gradients = self.update(iteration, np.sum(self.gradients, axis=0))
        self.gradients = []
        return updated_gradients

    def update(self, iteration, gradients):
        raise NotImplementedError

class Optimizer:
    def __init__(self, OptimizerBaseClass, optimizerArgs, param_shapes):
        self.optimizers = [
            OptimizerBaseClass(**{**optimizerArgs, 'shape': shape})
            for shape in param_shapes
        ]

    def set_gradients(self, gradients):
        for optimizer, grad in zip(self.optimizers, gradients):
            optimizer.set_gradients(grad)
    '''
    def set_gradients_batch(self, gradients):
        for optimizer, grad in zip(self.optimizers, gradients):
            optimizer.set_gradients_batch(grad)
    '''

    def set_gradients_batch(self, gradients, batch_size):
        for i in range(batch_size):
            self.set_gradients(gradients[i])


    def get_gradients(self, iteration):
        return [opt.get_gradients(iteration) for opt in self.optimizers]

class SGD(OptimizerBase):
    def __init__(self, learning_rate=0.01, **kwargs):
        super().__init__(**kwargs)
        self.learning_rate = learning_rate

    def update(self, iteration, weights):
        return -self.learning_rate * weights


In [None]:
class Initializer:
    def __init__(self):
        self.layer_shapes = None
        self.index = None

    def set_layer_shapes(self, layer_shapes):
        self.layer_shapes = layer_shapes

    def set_layer_index(self, index):
        self.index = index

    def get_io_shape(self):
        return self.layer_shapes[self.index]

    def get(self):
        return self.get(1)[0]

    def get(self, *shape):
        raise NotImplementedError

class Xavier(Initializer):
    def get(self, *shape):
        io = self.get_io_shape()
        input_neurons = np.prod(io[0])
        return np.random.randn(*shape) * np.sqrt(1 / input_neurons)
class Normal(Initializer):
    def __init__(self, mean=0, std=1):
        super().__init__()
        self.mean = mean
        self.std = std

    def get(self, *shape):
        return np.random.normal(self.mean, self.std, shape)
class Constant(Initializer):
    def __init__(self, fill_value=1.0):
        super().__init__()
        self.fill_value = fill_value

    def get(self, *shape):
        return np.full(shape, self.fill_value)

In [None]:
def create_model(network, initializer, OptimizerBaseClass, optimizerArgs={}):
    print(len(network))

    
    for i, layer in enumerate(network):
        print(i,layer.input_shape,layer.output_shape)
    print('----')
    

    # set input_shape & output_shape
    for i, layer in enumerate(network):
        #print(i,'layer')
        if not layer.input_shape:
            layer.input_shape = network[i - 1].output_shape
        layer.on_input_shape()
        if not layer.output_shape:
            layer.output_shape = layer.input_shape

    
    for i, layer in enumerate(network):
        print(i,layer.input_shape,layer.output_shape)
    
    
    # initialize layers & create one optimizer per layer
    layer_shapes = [(layer.input_shape, layer.output_shape) for layer in network]
    initializer.set_layer_shapes(layer_shapes)
    optimizers = []
    weights    = []
    for i, layer in enumerate(network):
        initializer.set_layer_index(i)
        param_shapes = layer.initialize(initializer)
        optimizers.append(Optimizer(OptimizerBaseClass, optimizerArgs, param_shapes) if layer.trainable else None)
        weights.append(layer.get_weights() if layer.trainable else None)

    # return list of (layer, optimizer)
    return list(zip(network, optimizers, weights))

def summary(model):
    for layer, _ in model:
        print(layer.input_shape, '\t', layer.output_shape)

def forward(model, input):
    output = input
    for layer, optimizer, weight in model:
        output = layer.forward(output)     
    return output


def forward_batch(model, input, batch_size=1):
    output = input
    for layer, optimizer, weight in model:
        output = layer.forward_batch(output, batch_size)
    return output

def backward(model, output):
    error = output
    layer_count=1
    #grad=None
    
    for layer, optimizer, weight in reversed(model):
        if layer_count<=10:
            error, gradients = layer.backward(error)
            if layer.trainable:
                #print(gradients[0].shape)
                #print(np.sum(gradients[0]))
                #print(gradients[1].shape)
                #print(np.sum(gradients[1]))
                optimizer.set_gradients(gradients)
                #grad=gradients
            #layer_count+=1
    #return grad
    return error

def backward_batch(model, output, batch_size):
    error = output
    layer_count=1
    #grad=None
    
    for layer, optimizer, weight in reversed(model):
        if layer_count<=10:
            error, gradients = layer.backward_batch(error, batch_size)
            if layer.trainable:
                #print(gradients[0][0].shape)
                #print(np.sum(gradients[0][0]))
                #print(gradients[0][1].shape)
                #print(np.sum(gradients[0][1]))
                optimizer.set_gradients_batch(gradients,batch_size)
                #grad=gradients
            #layer_count+=1
    #return grad
    return error

def update(model, iteration):
    check=True
    for layer, optimizer, weight in model:
        if layer.trainable:
            check=layer.update(optimizer.get_gradients(iteration))
            
    return check

#def get_weights(model):
#    return model[:,2]
#def set_weights(model, weights) : 
#    model[:,2]=weights

def train_batch(model, loss, x_train, y_train, epochs, batch_size):
    train_set_size = len(x_train)
    for epoch in range(1, epochs + 1):
        error = 0
        for i in range(0, train_set_size, batch_size):
            break_loop=False
            print(f"epoch: {epoch} batch:{i}")
            x=x_train[i:i+batch_size]
            y=y_train[i:i+batch_size]          
            output = forward_batch(model, x, batch_size)
            tmp  =loss.call_batch(y, output, batch_size)
            error+=tmp
            #error += loss.call_batch(y, output, batch_size)
            
            if(np.isnan(np.sum(output))):
                break_loop=True
                print("output fail")
                #print(output)
            else:    
                print("output ok")

            check_backward=backward_batch(model, loss.prime_batch(y, output, batch_size), batch_size)

            if(np.isnan(np.sum(check_backward))):
                break_loop=True
                print("backward fail")
                #print(check_backward)
            else:    
                print("backward ok")
             
            check_update =update(model, epoch)
            if(check_update==False):
                print("update fail")
                break_loop=True
            else:
                print("update ok")
        
            if(break_loop):
                return


            '''
            if(np.isnan(tmp)):
                print(epoch)
                print('fuck')
                print(y)
                print(output)
                break
            '''
            #print("error",tmp)
            
            
            #print(x.shape)
            #print(y.shape)
            #print(y)
            #print(output.shape)
            #print(output)
            #print(loss.prime_batch(y, output,batch_size).shape)
            #print("----")
            
            #backward_batch(model, loss.prime_batch(y, output, batch_size), batch_size)
            #print("back_pro")
            #update(model, epoch)
            #print("update")
        #print(error)  
        #print(epoch,"-------")     
  
        #error /= train_set_size
        #print('%d/%d, error=%f' % (epoch, epochs, error))

def train(model, loss, x_train, y_train, epochs, batch=1):
    train_set_size = len(x_train)
    for epoch in range(1, epochs + 1):
        error = 0

        for x, y in zip(x_train, y_train):
            #if epoch % batch == 0:
            #  print(epoch,batch)
            
            output = forward(model, x)
            error += loss.call(y, output)
            
            #print(y.shape)
            #print(output.shape)
            #print(loss.prime(y, output).shape)
            #print("----")
            backward(model, loss.prime(y, output))
            if epoch % batch == 0:
                update(model, epoch)
        #update(model, epoch)
        error /= train_set_size
        print('%d/%d, error=%f' % (epoch, epochs, error))

def test(model, loss, x_test, y_test):
    error = 0
    for x, y in zip(x_test, y_test):
        output = forward(model, x)
        error += loss.call(y, output)
    error /= len(x_test)
    return error

In [None]:
def load_data(n):
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = np_utils.to_categorical(y_train)

    x_test = x_test.astype('float32')
    x_test /= 255
    y_test = np_utils.to_categorical(y_test)

    return x_train[:n], y_train[:n], x_test, y_test
'''
# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

print(x_train.shape)
print(y_train.shape)
'''

model_architecture=[
    Reshape((1, 784), input_shape=(28, 28)),
    #Dense(50),
    #Tanh(),
    #Dense(20),
    #Tanh(),
    Dense(10),
    Softmax()
]


model_single = create_model(model_architecture, Normal(), SGD, {'learning_rate': 0.5})
model_batch=copy.deepcopy(model_single)
mse = MSE()



3
0 (28, 28) (1, 784)
1 None (1, 10)
2 None None
----
0 (28, 28) (1, 784)
1 (1, 784) (1, 10)
2 (1, 10) (1, 10)


In [None]:
def train_debug(model_single, model_batch, loss, x_train, y_train, epochs, batch_size=1):
    train_set_size = len(x_train)
    for epoch in range(1, epochs + 1):
        error_single = 0
        error_batch = 0

        for x, y in zip(x_train, y_train):    
            output = forward(model_single, x)
            error_single += loss.call(y, output)

            tmp=loss.prime(y, output)

            er1=backward(model_single, loss.prime(y, output))
            #print(len(er1))
            #print(er1[0].shape)
            #print(er1[0][0])
            #print(er1[1].shape)
            #print(er1[1])
            #er1=np.array(er1)
            #print(tmp.shape)
            #print(np.sum(tmp))
            #print(er1.shape)
            #print(np.sum(er1))    
            update(model_single, epoch)
        #print('--------------------------')
        for i in range(0, train_set_size, batch_size):
            x_batch=x_train[i:i+batch_size]
            y_batch=y_train[i:i+batch_size] 
            output_batch = forward_batch(model_batch, x_batch, batch_size)
            error_batch +=loss.call_batch(y_batch, output_batch, batch_size)
            
            tmp=loss.prime_batch(y_batch, output_batch, batch_size)

            er2=backward_batch(model_batch, loss.prime_batch(y_batch, output_batch, batch_size), batch_size)
            update(model_batch, epoch)
            #print(len(er2))
            #print(er2[0][0])
            #er2=np.array(er2)

            #print(np.array(tmp).shape)
            #print(np.sum(tmp))
            
            #print(er2.shape) 
            #print(np.sum(er2))  
            '''
            for x, y in zip(x_batch, y_batch):  
                output = forward(model_batch, x)
                error_batch += loss.call(y, output)
                #backward(model_batch, loss.prime(y, output))    
                #update(model_batch, epoch
            '''
        #er3=er1-er2
        #print(np.sum(er3))
        #update(model, epoch)
        error_single /= train_set_size
        error_batch /= train_set_size
        #if error_single==error_batch:
        print(f'{epoch}/{epochs}, error_single={error_single} , error_batch={error_batch}' )

In [None]:
batch_size=10
x_train, y_train, x_test, y_test = load_data(1000)
#output=forward_batch(model, x_train[0:batch_size], batch_size)
#output.shape
#train_batch(model, mse, x_train, y_train, epochs=30, batch_size=batch_size)
#train(model_1, mse, x_train, y_train, epochs=3)
#train(model_2, mse, x_train, y_train, epochs=3)
           
train_debug(model_single, model_batch, mse, x_train, y_train, epochs=1000,batch_size=batch_size)

1/1000, error_single=0.14995552824223526 , error_batch=0.01500196599676113
2/1000, error_single=0.12762432405392327 , error_batch=0.013071456473037417
3/1000, error_single=0.1086897044598518 , error_batch=0.012121126955214594
4/1000, error_single=0.10096158152538838 , error_batch=0.011019596728542028
5/1000, error_single=0.08788719953603838 , error_batch=0.009996118650742968
6/1000, error_single=0.07941895378447128 , error_batch=0.008973885610363165
7/1000, error_single=0.07389977675151062 , error_batch=0.007825519829844555
8/1000, error_single=0.07031331233634991 , error_batch=0.007229291405009201
9/1000, error_single=0.06835622167904433 , error_batch=0.006976808336242658
10/1000, error_single=0.06563211055523475 , error_batch=0.00676195217227399
11/1000, error_single=0.06381505968543139 , error_batch=0.006593093764013937
12/1000, error_single=0.062083825302390906 , error_batch=0.006431619936216603
13/1000, error_single=0.06048910602896773 , error_batch=0.006293279456195341
14/1000, e

In [None]:
print('error on test set:', test(model_single, mse, x_test, y_test))

error on test set: 0.02971994954095478


In [None]:
print('error on test set:', test(model_batch, mse, x_test, y_test))

error on test set: 0.031060595854633142
