# Neural Network from Scratch

**TODO:**
- Shuffle the dataset
- Split up the dataset into training, validation, and testing

In [1090]:
# Import libraries.
import numpy as np
import pandas as pd
from abc import ABC, abstractmethod

In [1091]:
dataset = pd.read_csv('Concrete_dataset.csv')

**Attirbutes/Features:**
- Cement (kg_in_m3)
- Blast Furnace Slag (kg_in_m3)
- Fly Ash (kg_in_m3)
- Water (kg_in_m3)
- Superplasticizer (kg_in_m3)
- Coarse Aggregate (kg_in_m3)
- Fine Aggregate (kg_in_m3)
- Age (day)

**Labels/Targets:**
- Concrete compressive strength (Mpa)

In [1092]:
dataset.head()

Unnamed: 0,Cement (kg_in_m3),Blast Furnace Slag (kg_in_m3),Fly Ash (kg_in_m3),Water (kg_in_m3),Superplasticizer (kg_in_m3),Coarse Aggregate (kg_in_m3),Fine Aggregate (kg_in_m3),Age (day),Concrete compressive strength (Mpa)
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Note that small amount of data entries. 1030 might not be enough!

In [1093]:
dataset.describe()

Unnamed: 0,Cement (kg_in_m3),Blast Furnace Slag (kg_in_m3),Fly Ash (kg_in_m3),Water (kg_in_m3),Superplasticizer (kg_in_m3),Coarse Aggregate (kg_in_m3),Fine Aggregate (kg_in_m3),Age (day),Concrete compressive strength (Mpa)
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


# Dataset Management Class

In [1094]:
class DatasetManagement:
    @staticmethod
    def split_into_train_test_valdiation(dataset, test_size, validation_size, random_state):
        # Check input.
        if test_size < 0 or test_size > 1:
            raise ValueError('Test size must be between 0 and 1.')
        if validation_size < 0 or validation_size > 1:
            raise ValueError('Validation size must be between 0 and 1.')
        if test_size + validation_size >= 1:
            raise ValueError('Sum of test size and validation size must be less than 1.')
        # Shuffle the dataset.
        dataset = dataset.sample(frac=1, random_state=random_state)
        # Create train, test, and validation sets.
        train_stop_index = int(len(dataset) * (1 - test_size - validation_size))
        test_stop_index = int(len(dataset) * (1 - validation_size))
        train = dataset[:train_stop_index]
        test = dataset[train_stop_index:test_stop_index]
        validation = dataset[test_stop_index:]
        # Return the sets in a dictionary.
        return [
            train.iloc[:, :-1].values,
            train.iloc[:, -1].values,
            test.iloc[:, :-1].values,
            test.iloc[:, -1].values,
            validation.iloc[:, :-1].values,
            validation.iloc[:, -1].values
        ]
    @staticmethod
    def normalize(dataset):
        # Normalize the dataset.
        mean = np.mean(dataset, axis=0)
        std = np.std(dataset, axis=0)
        return (dataset - mean) / std

In [1095]:
# Noramlize the dataset.
dataset.iloc[:, :-1] = DatasetManagement.normalize(dataset.iloc[:, :-1])
# Split up the data into train, test, and validation sets.
data = DatasetManagement.split_into_train_test_valdiation(dataset, 0.1, 0.1, 42)
[train_X, train_y, test_X, test_y, validation_X, validation_y] = data

1      -0.279733
2       3.553066
3       5.057677
4       4.978487
          ...   
1025   -0.279733
1026   -0.279733
1027   -0.279733
1028   -0.279733
1029   -0.279733
Name: Age (day), Length: 1030, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  dataset.iloc[:, :-1] = DatasetManagement.normalize(dataset.iloc[:, :-1])


# Math

This seciton should hold class with static methods for math functions that are reued.

In [1096]:
class Math:

    # ReLU activation function.
    @staticmethod
    def ReLU(x):
        return np.maximum(0, x)
    @staticmethod
    def dReLU(x):
        return np.where(x <= 0, 0, 1)
    
    # Mean Squared Error loss function.
    @staticmethod
    def MSE(y_pred, y_true):
        return np.mean(np.square(y_pred - y_true))
    def dMSE(y_pred, y_true):
        return 2*np.mean((y_pred - y_true))

# Neuron

In [1097]:
class Neuron:
    
    # Constructor.
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias

    # Calculation method.
    # This is where we apply the weights and bias to the input data.
    #
    # Math:
    # y = w^T*x + b
    def calculate(self, input_data):
        part_y = np.dot(self.weights, input_data) + self.bias
        return part_y
    
    # Adjust weights and bias.
    # This method is called during the backward propagation step.
    def adjust(self, weights_gradient, bias_gradient, learning_rate):
        self.weights -= learning_rate * weights_gradient
        #if (self.bias != self.bias - learning_rate * bias_gradient):
        #    print(f'Bias: {self.bias}')
        #    print(f'Bias Gradient: {bias_gradient:.2f}')
        #    print(f'learning_rate: {learning_rate}')
        #    print(f'New Bias: {(self.bias - learning_rate * bias_gradient):.2}\n')
        self.bias -= learning_rate * bias_gradient

# Layers

- The shape (4, 4) means 4 columns and 4 rows
- The shape (2, 4) means 2 columns and 4 rows
- The shape (1, 2) means 1 column and 2 rows

### Pre-Activation Function & Activation Function
The pre-activation function is handled inside each neuron, and the activation function is applied in the layer.

**Notes:**
- NumPy dotproduct does tranposing for us.

In [1098]:
class LayerAbstract(ABC):
    
    # Set name.
    # Used for setting name based on the type of layer.
    @abstractmethod
    def set_name(self, index):
        pass

    # Forward pass.
    @abstractmethod
    def forward(self, input):
        pass

In [1099]:
class Layer:
    # Constructor.
    def __init__(self):
        self.prev_layer             = None
        self.next_layer             = None
        self.num_of_inputs          = None
        self.name                   = None

In [1100]:
class InputLayer(LayerAbstract, Layer):

    # Constructor.
    # Input shape should be an integer.
    def __init__(self, input_size):

        self.x = None

        # Fix the input size.
        if input_size <= 0:
            raise ValueError("Input size should be a positive integer.")
        self.input_size = input_size

        super().__init__()
    
    # Set name.
    def set_name(self, index):
        self.name = f'Input Layer {str(index)} with shape ({self.input_size})'

    # Forward pass.
    # No math is done here as this is the input layer.
    def forward(self, x):
        self.x = x
        return self.next_layer.forward(x)

In [1101]:
# Notes:
# - The inital weights and bias can be generated in better ways that can simplyfy the convergence of the model.
class DenseLayer(LayerAbstract, Layer):

    # Constructor.
    def __init__(self, num_of_neurons, activation_function):
        
        # Fix the output size.
        if num_of_neurons is not None and num_of_neurons <= 0:
            raise ValueError("Output size should be a positive integer.")
        
        # Set variables.
        self.num_of_neurons                     = num_of_neurons
        self.neurons                            = []
        self.activation_function                = None
        self.derivative_activation_function     = None
        self.a                                  = None
        self.z                                  = None

        # Set activation function.
        self.__set_actionvation_function(activation_function)    

        super().__init__()

    # Set activation function.
    def __set_actionvation_function(self, activation_function):
        activation_function_lower_case = activation_function.lower()
        match activation_function_lower_case:
            case 'relu':
                self.activation_function = Math.ReLU
                self.derivative_activation_function = Math.dReLU
            case _:
                raise ValueError("Activation function not supported.")
    
    # Add neurons.
    def add_neurons(self, num_of_inputs):
        self.num_of_inputs = num_of_inputs # Store for later use.
        for i in range(self.num_of_neurons):
            # Generate random initial weights using He initialization.
            weights = np.random.randn(num_of_inputs) * np.sqrt(2 / num_of_inputs) # He initialization.
            bias = np.random.uniform(-1, 1)
            self.neurons.append(Neuron(weights, bias))
    
    # Set name.
    def set_name(self, index):
        self.name = f'Dense Layer {str(index)} with shape ({self.num_of_neurons}, {self.num_of_inputs})'

    # Forward pass.
    # Apply formula:
    # a = g(y)
    def forward(self, input):
        # Creata a list to store the output of each neuron.
        a = []
        z = []
        for neuron in self.neurons:
            part_z = neuron.calculate(input)
            part_a = self.activation_function(part_z)
            z.append(part_z)
            a.append(part_a)
        # Store the output for back propagation.
        self.z = np.array(z)
        self.a = np.array(a)
        # Pass the output to the next layer (if any).
        if self.next_layer is not None:
            return self.next_layer.forward(a)
        else:
            return a
        
    # Backward pass.
    # This is the learning stage.
    def backward(self, gradient, learning_rate):
        print(f'Gradient: {gradient}')
        # Calculate the gradients.
        if isinstance(self.prev_layer, InputLayer):
            dZdW = gradient * self.prev_layer.x
        else:
            dZdW = gradient * self.prev_layer.a
        dZdB = gradient
        # Send the graidients to the neurons.
        for neuron in self.neurons:
            neuron.adjust(dZdW, dZdB, learning_rate)
        # Pass the gradient to the previous layer (if any).
        if not isinstance(self.prev_layer, InputLayer):
            dZdA = gradient * np.array([neuron.weights for neuron in self.neurons])
            dAdZ = self.prev_layer.derivative_activation_function(self.prev_layer.z)
            # Sum up the result of the dot product as we want to keep the gradient as a single value.
            new_gradient = np.sum(np.dot(dZdA, dAdZ))
            self.prev_layer.backward(new_gradient, learning_rate)

# Model

In [1102]:
class Model:
    
    # Constructor.
    def __init__(self, layers_to_add):
        # Define variables to be used.
        self.head_layer                 = None
        self.tail_layer                 = None
        self.num_of_layers              = 0
        self.cost_function              = None
        self.derivative_cost_function   = None

        # Add layers.
        for layer in layers_to_add:
            self.__add_layer(layer)
        # Print layers.
        self.print_layers()
        # Perform checks.
        self.__check_stucture()

    # Set cost function.
    def set_cost_function(self, cost_function):
        cost_function_lower_case = cost_function.lower()
        match cost_function_lower_case:
            case 'mse':
                self.cost_function = Math.MSE
                self.derivative_cost_function = Math.dMSE
            case _:
                raise ValueError("Cost function not supported")

    # Check for required layers.
    # The model must have a input and output layer.
    def __check_stucture(self):
        if not isinstance(self.head_layer, InputLayer):
            raise ValueError("Model must have an input layer.")
        if not isinstance(self.tail_layer, DenseLayer):
            raise ValueError("Model must have an output layer.")
        print('Model structure is correct.')

    # Add layers.
    def __add_layer(self, layer_to_add):
        # Check the first layer.
        if self.head_layer is None and not isinstance(layer_to_add, InputLayer):
            raise ValueError("First layer should be InputLayer.")

        # Point previous layer to the new layer.
        if self.tail_layer is not None:
            self.tail_layer.next_layer = layer_to_add
            layer_to_add.prev_layer = self.tail_layer

        # Set head layer.
        if self.head_layer is None:
            self.head_layer = layer_to_add

        # Set tail layer.
        self.tail_layer = layer_to_add

        # Add neurons.
        # We have to check the tail layer type ot know how to select the
        # number of neurons we need to add.
        if not isinstance(layer_to_add, InputLayer): # Then we know that the layer has neurons.
            prev_layer = layer_to_add.prev_layer
            num_of_inputs = None
            if isinstance(prev_layer, InputLayer):
                num_of_inputs = prev_layer.input_size
            else:
                num_of_inputs = prev_layer.num_of_neurons
            layer_to_add.add_neurons(num_of_inputs)
        
        # Name the layer.
        layer_to_add.set_name(self.num_of_layers)
        self.num_of_layers += 1

    # Print layers.
    def print_layers(self):
        layer = self.head_layer
        while layer is not None:
            print(layer.name)
            layer = layer.next_layer

    # Predict trough forward propegation.
    # Each layer has it's own forward function,
    # there the math is done.
    def predict(self, inputs):
        # Make sure that the inputs are in the correct format.
        inputs = np.array(inputs)
        # Convert single input to 2d array.
        if (len(inputs.shape) == 1):
            inputs = np.array([inputs])
        # Check if the input size is correct.
        if len(inputs.shape) != 2 or inputs.shape[1] != self.head_layer.input_size:
            raise ValueError("Input size is not correct.")
        # Prepare results.
        predictions = []
        for input in inputs:
            # Pass the input to the first layer.
            pred = self.head_layer.forward(input)
            predictions.append(pred)
        return np.array(predictions).flatten()
    
    # Test function.
    # Return the result of the cost function.
    # x is the attributes and y is the labels.
    # Note that this methods supports both arrays and single values.
    def test(self, inputs, y_true):
        y_pred = self.predict(inputs)
        return self.cost_function(y_pred, y_true)
    
    # Backward propagation function.
    def __backward(self, y_pred, y_true, learning_rate):
        dCdA = self.derivative_cost_function(y_pred, y_true)
        dAdZ = self.tail_layer.derivative_activation_function(self.tail_layer.z)
        gradient = np.sum(dCdA * dAdZ)
        #print(f'dCdA: {dCdA:.2f}')
        #print(f'dAdZ: {dAdZ}')
        #print(f'Gradient: {gradient:.2f}')
        self.tail_layer.backward(gradient, learning_rate)
    
    # Fiting function used for training the model.
    # It's goal is to call the back propegation function
    # a certain amount of times.
    def fit(self, train_X, train_y, validation_X, validation_y, learning_rate, epochs, batch_size):
        for epoch in range(epochs):
            for i in range(len(train_X)):
                # Get input and output.
                input = train_X[i]
                y_true = train_y[i]
                # Forward pass.
                y_pred = self.head_layer.forward(input)
                # Backward pass.
                self.__backward(y_pred, y_true, learning_rate)
            print(f'Epoch: {epoch + 1}, Loss: {self.test(validation_X, validation_y)}') # Here we should use validation set!

In [1103]:
model = Model([
    InputLayer(input_size=8),
    DenseLayer(num_of_neurons=8, activation_function="ReLU"),
    DenseLayer(num_of_neurons=4, activation_function="ReLU"),
    DenseLayer(num_of_neurons=2, activation_function="ReLU"),
    DenseLayer(num_of_neurons=1, activation_function="ReLU"),
])

Input Layer 0 with shape (8)
Dense Layer 1 with shape (8, 8)
Dense Layer 2 with shape (4, 8)
Dense Layer 3 with shape (2, 4)
Dense Layer 4 with shape (1, 2)
Model structure is correct.


In [1104]:
# Set cost function.
model.set_cost_function('MSE')

In [1105]:
# Test model before training.
model.test(test_X, test_y)

1445.6462489431738

In [1106]:
model.head_layer.next_layer.neurons[0].weights

array([-0.07971967, -0.45712251,  0.22861625, -0.24482268, -0.68336428,
        0.73427734,  0.31453663, -0.34108359])

In [1107]:
model.head_layer.next_layer.neurons[1].weights

array([-0.04547322, -0.06993896, -0.09795988, -0.58113678,  0.16773377,
       -0.09538489,  0.71968028, -0.19945427])

In [1108]:
model.fit(train_X, train_y, validation_X, validation_y, 0.1, 100, 32)

Gradient: -105.32316688286772
Gradient: -1409.7646183387055
Gradient: -2493261.2161208224
Gradient: -18430901080152.418
Gradient: 6108712246.731054
Gradient: -1.6459154040373832e+27
Gradient: -5.403478784783488e+59
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
G

Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradient: 0.0
Gradie

KeyboardInterrupt: 

In [None]:
print(model.head_layer.next_layer.neurons[0].bias)
print(model.head_layer.next_layer.neurons[1].bias)

1334224.5149032155
1334224.4872356975


In [None]:
model.tail_layer.prev_layer.neurons[0].weights

array([ 6.92898317, 11.96426214, -0.41290303, -0.94024133])

In [None]:
model.predict(test_X[1])

array([0.])

In [None]:
model.head_layer.x

array([-1.20977562,  1.26979841, -0.84714393,  0.54970019,  0.48490513,
       -1.23164709,  1.17448189, -0.27973311])

In [None]:
print(model.head_layer.next_layer.z)
print(model.head_layer.next_layer.a)

[5796980.63237128 5796979.29677637 5796978.22827943 5796977.16675954
 5796980.85567776 5796979.78487836 5796978.68352477 5796978.45219239]
[5796980.63237128 5796979.29677637 5796978.22827943 5796977.16675954
 5796980.85567776 5796979.78487836 5796978.68352477 5796978.45219239]


In [None]:
model.head_layer.next_layer.neurons[0].weights

array([-1739288.09504695, -1143279.57192085,  2603365.25284044,
        1277268.55073321,   401173.86598026, -2556624.81125705,
        1588664.8151438 ,  -373226.53044031])

In [None]:
model.head_layer.next_layer.neurons[1].weights

array([-1739288.11396728, -1143280.62898307,  2603364.18986395,
        1277267.7923795 ,   401174.31906907, -2556624.2681419 ,
        1588664.9149097 ,  -373226.02946325])