# NN2
Tymoteusz Urban

In [365]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

In [650]:
class NeuralNetwork:
    
    def __init__(self, layers, weights=None, biases=None, activations=None):
        self.layers = layers
        self.num_layers = len(layers)
        
        if weights is None:
            # self.weights = [self.xavier_init(layers[i-1], layers[i]) for i in range(1, self.num_layers)]
            self.weights = [np.random.randn(layers[i-1], layers[i]) for i in range(1, self.num_layers)]
        else:
            self.weights = weights
        
        if biases is None:
            # self.biases = [self.xavier_bias_init(layers[i]) for i in range(1, self.num_layers)]
            self.biases = [np.random.randn(layers[i]) for i in range(1, self.num_layers)]
        else:
            self.biases = biases
        
        if activations is None:
            self.activations = ['sigmoid' for i in range(1, self.num_layers - 1)] + ['linear']
        else:
            self.activations = activations
        
        activation_functions = {
            'sigmoid': self._sigmoid,
            'linear': self._linear
        }
        self.activation_funcs = list(map(lambda x: activation_functions.get(x), self.activations))
    
    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _linear(self, z):
        return z
    
    def xavier_init(self, n_in, n_out):
        variance = 2.0 / (n_in + n_out)
        stddev = np.sqrt(variance)
        return np.random.normal(0, stddev, (n_in, n_out))
    
    def xavier_bias_init(self, n_out):
        variance = 1.0 / n_out
        stddev = np.sqrt(variance)
        return np.random.normal(0, stddev, n_out)
    
    def feedforward(self, a, return_activations=False):
        if return_activations:
            activations = [a]
            for w, b, func in zip(self.weights, self.biases, self.activation_funcs):
                z = np.dot(a, w) + b
                a = func(z)
                activations.append(a)
            return activations
        else:
            for w, b, func in zip(self.weights, self.biases, self.activation_funcs):
                z = np.dot(a, w) + b
                a = func(z)
            return a
            
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def predict(self, X):
        return self.feedforward(X)
    
    def mse(self, X, y):
        predictions = self.predict(X)
        return np.mean((predictions - y) ** 2)
    
    def sigmoid_derivative(self, a):
        """calculates sigm'(z) where a = sigm(z)"""
        return a * (1-a)
    
    def backward(self, X, y, learning_rate):
        """backpropagation, returns partial derevatives"""
        # feedforward
        activations = self.feedforward(X, return_activations=True)
        deltas = [None] * len(self.weights)
        # output error
        deltas[-1] = activations[-1] - y.reshape(-1, 1)
        # calculate neurons' errors using backpropagation 
        for i in reversed(range(len(deltas) - 1)):
            if self.activations[i] == "sigmoid":
                # calculate error delta_l = delta_{l+1} x w_{l+1} * sigmoid'()
                deltas[i] = np.dot(deltas[i+1], self.weights[i+1].T) * self.sigmoid_derivative(activations[i+1])
            elif self.activations[i] == "linear":
                deltas[i] = np.dot(deltas[i+1], self.weights[i+1].T)
        
        L = len(self.weights)
        weights_gradient = [None] * L
        biases_gradient = [None] * L
        # calculate partial derevatives of cost function
        for i in range(L):
            weights_gradient[i] = np.dot(activations[i].T, deltas[i])
            biases_gradient[i] = np.mean(deltas[i], axis=0)
            
        return weights_gradient, biases_gradient
        
    def update_weights(self, X, y, learning_rate):
        """
        updates weights using gradient descent
        """
        weights_gradient, biases_gradient = self.backward(X, y, learning_rate)
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * weights_gradient[i]
            self.biases[i] -= learning_rate * biases_gradient[i]
    
    def create_batches(self, X, y, batch_size):
        N = len(X)
        combined_data = np.array(list(zip(X, y)))
        np.random.shuffle(combined_data)

        X_shuffled = np.array(list(zip(*combined_data))[0])
        y_shuffled = np.array(list(zip(*combined_data))[1])

        X_mini_batches = [X_shuffled[k:k+batch_size] for k in range(0, N, batch_size)]
        y_mini_batches = [y_shuffled[k:k+batch_size] for k in range(0, N, batch_size)]
        return X_mini_batches, y_mini_batches
        
    def train(self, X, y, learning_rate, epochs, batch_size=len(X)):        
        for epoch in range(epochs):
            if batch_size < len(X):
                batches_x, batches_y = self.create_batches(X, y, batch_size)
                for i in range(len(batches_x)):
                    self.update_weights(batches_x[i], batches_y[i], learning_rate)
            else:
                self.update_weights(X, y, learning_rate)
            if epoch % 500 == 0:
                loss = self.mse(X, y)
                print(f"Epoch {epoch}: Loss = {loss}")

In [362]:
square_simple = pd.read_csv("../dane/regression/square-simple-test.csv", index_col=0)
square_simple_train = pd.read_csv("../dane/regression/square-simple-training.csv", index_col=0)
steps_large = pd.read_csv("../dane/regression/steps-large-test.csv", index_col=0)

In [656]:
mlp = NeuralNetwork([1,10,1])

In [657]:
mlp.train(square_simple['x'].to_numpy().reshape(-1, 1), square_simple['y'].to_numpy().reshape(-1, 1), 0.00001, 20000, 25)

Epoch 0: Loss = 8729.975635007862
Epoch 500: Loss = 5685.0014672768
Epoch 1000: Loss = 4985.930265035239
Epoch 1500: Loss = 4561.623910438382
Epoch 2000: Loss = 4186.410473091892
Epoch 2500: Loss = 3825.4982756698946
Epoch 3000: Loss = 3422.737396355571
Epoch 3500: Loss = 3040.423235003524
Epoch 4000: Loss = 2679.7756572515273
Epoch 4500: Loss = 2345.821707913178
Epoch 5000: Loss = 2046.6042677897997
Epoch 5500: Loss = 1785.1975159683102
Epoch 6000: Loss = 1560.537878321971
Epoch 6500: Loss = 1369.3547177770115
Epoch 7000: Loss = 1207.4761411097063
Epoch 7500: Loss = 1070.6083794549556
Epoch 8000: Loss = 954.7437858849828
Epoch 8500: Loss = 856.346536927339
Epoch 9000: Loss = 772.4170752764475
Epoch 9500: Loss = 700.4552983711344
Epoch 10000: Loss = 638.4052179876758
Epoch 10500: Loss = 584.5951708147759
Epoch 11000: Loss = 537.6472365063645
Epoch 11500: Loss = 496.4324693170104
Epoch 12000: Loss = 460.018505260388
Epoch 12500: Loss = 427.6328104751584
Epoch 13000: Loss = 398.637560791