## Loading preprocessed data
Loading preprocessed data in part 1 about car features and CO2 pollution prediction

In [28]:
import numpy as np
import os
from pathlib import Path

# Loading preprocessed data


base_path = Path(os.path.abspath("03_model_comparison.ipynb")).parent.parent
X_train = np.load(base_path / "data/processed/X_train_preprocessed.npy")
X_test  = np.load(base_path / "data/processed/X_test_preprocessed.npy")
y_train = np.load(base_path / "data/learn/y_train.npy")
y_test  = np.load(base_path / "data/check/y_test.npy")


In [29]:

print("Shapes:")
print("X_train:", X_train.shape)
print("X_test :", X_test.shape)
print("y_train:", y_train.shape)
print("y_test :", y_test.shape)



Shapes:
X_train: (800, 23)
X_test : (200, 23)
y_train: (800,)
y_test : (200,)


## Activation Functions

In [30]:

# Activation Functions

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    # x is the ACTIVATION, not the field
    return x * (1 - x)



## MULTILAYER WITH BACKPROPAGATION + MOMENTUM
The techniques of BACKPROPAGATION and MOMENTUM are combined:
- Backpropagation calculates the current gradient to indicate in which direction the weight should move to reduce the error.
- Momentum reviews the previous update(with self.d_w_prev and self.d_theta_prev )
- Finally are combined with the add in:
    d_w[i, j] = (...GRADIENTE and MOMENTUM)
        and
    d_theta[i] = (...GRADIENTE and MOMENTUM)
)

In [31]:
class Multilayer:
    def __init__(self, n, eta=0.01, alpha=0.5):
        """
        n: lista con neuronas por capa (ej: [4, 6, 1])
        eta: learning rate
        alpha: momentum
        """

        self.n = n
        self.L = len(n)
        self.eta = eta
        self.alpha = alpha

        # Fixed activation = sigmoid
        self.f = sigmoid
        self.df = d_sigmoid

        # Fields, activations, and deltas
        self.h = [np.zeros(n[l]) for l in range(self.L)]
        self.xi = [np.zeros(n[l]) for l in range(self.L)]
        self.delta = [np.zeros(n[l]) for l in range(self.L)]

        # Weights and thresholds
        self.w = [None]
        self.theta = [None]

        for l in range(1, self.L):
            self.w.append(np.random.randn(n[l], n[l-1]) * 0.1)
            self.theta.append(np.random.randn(n[l]) * 0.1)

        # Momentum
        self.d_w_prev = [None] + [np.zeros_like(self.w[l]) for l in range(1, self.L)]
        self.d_theta_prev = [None] + [np.zeros_like(self.theta[l]) for l in range(1, self.L)]


    # FEED-FORWARD, added to multilayer review document G
    def feed_forward(self, x):
        self.xi[0] = x

        for l in range(1, self.L):
            self.h[l] = self.w[l] @ self.xi[l-1] - self.theta[l]
            self.xi[l] = self.f(self.h[l])

        return self.xi[-1]


    # BACKPROPAGATION,, added to multilayer review document G
    def backprop(self, target):
        # Salida (ecuación 11)
        self.delta[-1] = self.df(self.xi[-1]) * (self.xi[-1] - target)

        # Hidden layers
        for l in range(self.L-2, 0, -1):
            self.delta[l] = self.df(self.xi[l]) * (self.w[l+1].T @ self.delta[l+1])


    # WEIGHT UPDATE, by requiment in document 
    def update(self):

     for l in range(1, self.L):

        # d_w have the same form that w[l]: (n[l], n[l-1])
        d_w = np.zeros_like(self.w[l])
        d_theta = np.zeros_like(self.theta[l])

        
        # UPDATE WEIGHT BY WEIGHT
        
        for i in range(self.n[l]):          # destination neuron in layer l
            for j in range(self.n[l-1]):    # origin neuron in layer l-1

                # formula as in the example of weights in the statement
                d_w[i, j] = (
                    -self.eta * self.delta[l][i] * self.xi[l-1][j]
                    + self.alpha * self.d_w_prev[l][i, j]
                )

        
        # Threshold update
        
        for i in range(self.n[l]):
            d_theta[i] = (
                self.eta * self.delta[l][i]
                + self.alpha * self.d_theta_prev[l][i]
            )

        
        # APPLY UPDATES
        self.w[l] += d_w
        self.theta[l] += d_theta

        # Save changes
        self.d_w_prev[l] = d_w
        self.d_theta_prev[l] = d_theta



    # Training
    def train(self, X, y, epochs=2000):

        for epoch in range(epochs):

            # online training (patrón a patrón)
            for i in range(len(X)):
                self.feed_forward(X[i])
                self.backprop(y[i])
                self.update()

            # Show partial error
            if epoch % 200 == 0:
                print("Epoch", epoch, "Error:", self.error(X, y))


    # Calculate error
    def error(self, X, y):
        E = 0
        for i in range(len(X)):
            pred = self.feed_forward(X[i])
            E += np.sum((pred - y[i])**2)
        return E / 2



## CREATE AND TRAIN THE NEURAL NETWORK
I have been adjusting the dynamic values to try to achieve a model with more optimal learning.

In [36]:

# CREATE AND TRAIN THE NEURAL NETWORK

n_input = X_train.shape[1]

ml = Multilayer(
    n=[n_input, 12, 1],  
    eta=0.07,
    alpha=0.4
)

ml.train(X_train, y_train, epochs=2000)



Epoch 0 Error: 6249067.969317463
Epoch 200 Error: 6249067.004708339
Epoch 400 Error: 6249066.981870297
Epoch 600 Error: 6248928.531752168
Epoch 800 Error: 6248928.515526523
Epoch 1000 Error: 6248928.511036392
Epoch 1200 Error: 6248928.508940865
Epoch 1400 Error: 6248928.507729924
Epoch 1600 Error: 6248928.506941907
Epoch 1800 Error: 6248928.506388568


## Test Evaluation

In [37]:

# Test Evaluation
predictions = np.array([ml.feed_forward(x) for x in X_test]).flatten()
mse = np.mean((predictions - y_test.flatten())**2)

print("\nMSE in test:", mse)






MSE in test: 16978.81186703673


## PREDICTION EXAMPLE

In [38]:

nuevo = X_test[0]  # any vector already preprocessed
pred = ml.feed_forward(nuevo)

print("\nCO2 Prediction (normalized):", pred)
print("Real value (normalized):", y_test[0])
#NeuralNet is added with the training percentage as an input value. 



CO2 Prediction (normalized): [0.00382502]
Real value (normalized): 0.0


## NeuralNet
NeuralNet is added with the training percentage as an input value. In this case with 80% training

In [39]:

class NeuralNet:
    def __init__(self, X, y, percentage, layers=[6], eta=0.05, alpha=0.5):
        """
        percentage: percentage of data used for TRAINING.
                    if percentage = 0 is the 100% training.
                    We will use percentage=0.8 for 80% training.
        """

        if percentage == 0.8:
            # TODO training
            self.X_train = X
            self.y_train = y
            self.X_val = None
            self.y_val = None

        else:
            
            if percentage > 1:
                p = percentage / 100.0
            else:
                p = percentage

            N = len(X)
            N_train = int(N * p)

            self.X_train = X[:N_train]
            self.y_train = y[:N_train]
            self.X_val  = X[N_train:]
            self.y_val  = y[N_train:]

        # Dynamic architecture according to input/output dimensions
        n_input = X.shape[1]
        n_output = y.shape[1]
        architecture = [n_input] + layers + [n_output]

        self.mlp = Multilayer(
            n=architecture,
            eta=eta,
            alpha=alpha
        )


## Conclusions
The BP model is not suitable for predicting CO2. The reason is : despite increasing the number of neurons to 12, the training is not optimal because learning stagnates after epoch 600 and The test MSE is very high(the error reduction is minimal 6249067.97−6248928.50 aprox 139.5), which indicates poor generalization