
# Neural Network Code
---


## Layer
The Neural Network is structured in Layers. Each layer contains the matrix $W$ and an activation function $\sigma$. By default $\sigma(x) = LeakyReLU(x)$. The weight matrix $W$ is initialized using the _He-et-el_ initialization and to prevent gradient exploding I use the gradient clipping method.


In [1]:
class Layer:
    ReLU = lambda x :  np.maximum(0, x)
    DE_ReLU = lambda x :  x > 0
    LEAKY_ReLU = lambda x : np.where(x > 0, x, x * 0.01)
    DE_LEAKY_ReLU = lambda x :  np.where(x > 0, 1, 0.01)
    LINEAR = lambda x: x
    DE_LINEAR = lambda x: 1
    SIGMOID = lambda x: 1/(1+np.exp(x))
    DE_SIGMOID = lambda x: Layer.SIGMOID(x)*(1-Layer.SIGMOID(x))
    DELTA_THRESHOLD = 5
    
    def __init__(self, input_size, output_size, ALPHA=.1):
        #He-et-al Initialization.
        self.W = np.random.randn(input_size, output_size) * np.sqrt(2 / output_size) 
        self.bias = np.random.randn(1, output_size) * np.sqrt(2 / output_size)
        # default activation function is Leaky ReLU
        self.SIGMA =  Layer.LEAKY_ReLU 
        self.DE_SIGMA = Layer.DE_LEAKY_ReLU
        
        self.input = None
        self.z = None
        self.ALPHA = ALPHA
        
    def forward(self, a):
        self.input = a # saves input for backprop later
        self.z = a.dot(self.W) + self.bias
        return self.SIGMA(self.z)
    
    def backprop(self, output_error):
        output_error = self.DE_SIGMA(self.z) * output_error
        delta = output_error @ self.W.T # delta for the next level
        delta = Layer.gradient_clipping(delta) # to prevent gradient exploding

        dW = self.input.T.dot(output_error)
        dB = output_error

        self.W -= self.ALPHA * dW
        self.bias -= self.ALPHA * dB
        return delta
    
    @staticmethod
    def gradient_clipping( delta ):
        return np.where(abs(delta)<Layer.DELTA_THRESHOLD, delta, abs(delta)/delta*Layer.DELTA_THRESHOLD)
    
    def __str__(self):
        return f"Layer {self.W.shape[0]}x{self.W.shape[1]}."


## Neural Network class
Since this excercise is about regression, the last layer uses $linear$ activation rather then the default $LeakyReLU$.
The user may use the initialization parameters `hidden_layers, epochs, batch_size, learning_rate` in order to adjust the hyperparameters of the algorithm. 


In [2]:
class NeuralNetwork:    
    def __init__(self, input_size, output_size=1, hidden_layers=[4,2], epochs=1000, batch_size=20, learning_rate=0.1):
        l = [input_size] + hidden_layers + [output_size]
        self.layers = [Layer(l[i], l[i+1], ALPHA=learning_rate) for i in range(len(l)-1)]
        #last 2 layers have linear activation (regression)
        self.layers[-1].SIGMA = Layer.LINEAR 
        self.layers[-1].DE_SIGMA = Layer.DE_LINEAR
        self.EPOCH = epochs
        self.BATCH = batch_size
        
        
    def fit(self, training_set, target_set):
        err = 1
        epoch=0
        while (epoch<self.EPOCH and err>0.001):
            batch = np.random.choice(len(training_set), size=self.BATCH, replace=False)
            X = training_set[batch]
            Y = target_set[batch]
            err=0
            for (x,y) in zip(X,Y):
                x = x.reshape(1,x.size)
                h = self.predict(x)
                # print(x, y, h)
                err += (y-h)**2 
            
            
                delta = (h-y)
                for layer in self.layers[::-1]:
                    delta = layer.backprop(delta)
                    
            epoch+=1
            if (epoch+1)%(self.EPOCH/10) == 0:
                err /= self.BATCH
                print(f"epoch {epoch+1} error rate is {err}")
                # for l in self.layers:
                #     print(l.W)
                
        print(f"Error is {err} at epoch {epoch+1}")
    
    def predict(self, x):
        output = x
        for layer in self.layers:
            output = layer.forward(output)
            
        return output 
    
    def __str__(self):
        return "\n".join([f"{str(L)}\nW is:\n{L.W}\nB is: {L.bias} " for L in self.layers])


## Utility
Some utility functions to evaluate the Neural Network and prepare the dataset

In [3]:
import numpy as np
def random_permutation(size, *arrays):
    seed = np.random.permutation(size)
    ret = list()

    for a in arrays:
        ret.append(a[seed])
    
    return tuple(ret)

def partition_dataset(percentage, x, y):
    size = len(x)
    training_size = round(percentage*size)
    x,y = random_permutation(size, x,y)
    
    return x[:training_size], y[:training_size], x[training_size:], y[training_size:]

def evaluate(nn, data, target):
    count = 0
    error = 0
    for (x,y) in zip(data, target):
        p = nn.predict(x)[0][0]
        # print(x, y, p)
        error += abs(y-p)/data.shape[0]
        if (round(p) == y):
            count += 1
    print(f"Accuracy: {round(count/len(data)*100, 2)}%, error rate = {error}")
    return count/len(data)

# Model Evaluation
---

## XOR
Accuracy is decent, even though it heavily depends on the initial values of $W_i$

In [4]:
XOR_train = np.array([[0,0], [0,1], [1,0], [1,1]])
XOR_target = np.array([[0], [1], [1], [0]])
        
nn_XOR = NeuralNetwork(XOR_train[0].shape[0], hidden_layers=[16,4], batch_size=3, epochs=1000, learning_rate=0.01)
nn_XOR.fit(XOR_train, XOR_target)
acc=evaluate(nn_XOR, XOR_train, XOR_target)


epoch 100 error rate is [[0.21001296]]
epoch 200 error rate is [[0.18193575]]
epoch 300 error rate is [[0.14103276]]
epoch 400 error rate is [[0.09420524]]
epoch 500 error rate is [[0.04609542]]
epoch 600 error rate is [[0.01023063]]
epoch 700 error rate is [[0.00434453]]
Error is [[0.00093082]] at epoch 733
Accuracy: 100.0%, error rate = [0.03482724]


## Iris
Due to a much larger dataset, Iris results are much more cleaner, consistent and less dependent on the initial $W_i$

In [5]:
from sklearn.datasets import load_iris
dataset = load_iris()
TRAINING_RATIO = .4

data_train, target_train, data_validate, target_validate = partition_dataset(TRAINING_RATIO, dataset.data, dataset.target)


nn = NeuralNetwork(data_train[0].shape[0], hidden_layers=[7,10,3], learning_rate=0.01)
nn.fit(data_train, target_train)
acc = evaluate(nn, data_validate, target_validate)



epoch 100 error rate is [[0.23550599]]
epoch 200 error rate is [[0.05275016]]
epoch 300 error rate is [[0.08158702]]
epoch 400 error rate is [[0.05524922]]
epoch 500 error rate is [[2.12595406]]
epoch 600 error rate is [[0.05333887]]
epoch 700 error rate is [[0.06819046]]
epoch 800 error rate is [[0.0213874]]
epoch 900 error rate is [[0.06546229]]
epoch 1000 error rate is [[0.045872]]
Error is [[1.41223645]] at epoch 1001
Accuracy: 98.89%, error rate = 0.1594247612529447
