## Применение алгоритма обратного распространения ошибки для решения задач регресии

In [9]:
import numpy as np

In [10]:
class ReLU:
    def function(self, x):
        return np.maximum(x, 0)

    def derivative(self, x):
        return x > 0


class Linear:
    def function(self, x):
        return x

    def derivative(self, x):
        return 1


class MSE:
    def function(self, y_pred, y_true):
        return np.mean((y_pred - y_true)**2)

    def derivative(self, y_pred, y_true):
        return 2 * (y_pred - y_true)

In [11]:
class Layer:
    def __init__(self, in_neurons_count, out_neurons_count, activation):
        self.in_neurons_count = in_neurons_count
        self.out_neurons_count = out_neurons_count
        self.activation = activation

        self.weights = np.random.normal(size=(in_neurons_count, out_neurons_count)) / np.sqrt(in_neurons_count)
        self.biases = np.zeros(out_neurons_count)

        self.weights_grad = np.zeros((in_neurons_count, out_neurons_count))
        self.biases_grad = np.zeros(out_neurons_count)

    def forward(self, inputs):
        self.inputs = inputs
        self.unactivated_outputs = inputs @ self.weights + self.biases
        self.activated_outputs = self.activation.function(self.unactivated_outputs)
        return self.activated_outputs

    def calculate_dQdu_as_output_layer(self, y_true, cost):
        dQda = cost.derivative(self.activated_outputs, y_true) # partial derivative of the cost with respect to the activated output
        self.dQdu = dQda * self.activation.derivative(self.unactivated_outputs) # partial derivative of the cost with respect to the unactivated output

    def calculate_dQdu_as_hidden_layer(self, next_layer):
        dQda = next_layer.weights @ next_layer.dQdu # partial derivative of the cost with respect to the activated output
        self.dQdu = dQda * self.activation.derivative(self.unactivated_outputs) # partial derivative of the cost with respect to the unactivated output

    def update_gradients(self):
        self.weights_grad += self.inputs.reshape((-1, 1)) @ self.dQdu.reshape((1, -1))
        self.biases_grad += self.dQdu

    def apply_and_reset_gradients(self, learning_rate):
        self.weights -= learning_rate * self.weights_grad
        self.biases -= learning_rate * self.biases_grad
        self.weights_grad = np.zeros(self.weights_grad.shape)
        self.biases_grad = np.zeros(self.biases_grad.shape)

In [12]:
class NN:
    def __init__(self, layers):
        self.layers = layers
    
    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backpropagation(self, y_true, cost):
        output_layer = self.layers[-1]
        output_layer.calculate_dQdu_as_output_layer(y_true, cost)
        output_layer.update_gradients()
        next_layer = output_layer

        for layer in reversed(self.layers[:-1]):
            layer.calculate_dQdu_as_hidden_layer(next_layer)
            layer.update_gradients()
            next_layer = layer

    def train_batch(self, batch_x, batch_y, cost, learning_rate):
        for x, y in zip(batch_x, batch_y):
            self.forward(x)
            self.backpropagation(y, cost)
        for layer in self.layers:
            layer.apply_and_reset_gradients(learning_rate / batch_x.shape[0])

#### Загрузка датасета

In [13]:
!wget -O data.txt.zip https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip

#### Обработка датасета

In [14]:
#"""
from tqdm.notebook import tqdm
import pandas as pd

df = pd.read_csv('data.txt.zip', header=None)

from sklearn.preprocessing import StandardScaler

X = df.iloc[:, 1:].values.astype('float32')
y = df.iloc[:, 0:1].values.astype('float32')

sc = StandardScaler()
sc_target = StandardScaler()

train_size = 463715
X_train = sc.fit_transform(X[:train_size, :])
y_train = sc_target.fit_transform(y[:train_size])
X_test = sc.transform(X[train_size:, :])
y_test = y[train_size:]
#"""

df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,81,82,83,84,85,86,87,88,89,90
0,2001,49.94357,21.47114,73.0775,8.74861,-17.40628,-13.09905,-25.01202,-12.23257,7.83089,...,13.0162,-54.40548,58.99367,15.37344,1.11144,-23.08793,68.40795,-1.82223,-27.46348,2.26327
1,2001,48.73215,18.4293,70.32679,12.94636,-10.32437,-24.83777,8.7663,-0.92019,18.76548,...,5.66812,-19.68073,33.04964,42.87836,-9.90378,-32.22788,70.49388,12.04941,58.43453,26.92061
2,2001,50.95714,31.85602,55.81851,13.41693,-6.57898,-18.5494,-3.27872,-2.35035,16.07017,...,3.038,26.05866,-50.92779,10.93792,-0.07568,43.2013,-115.00698,-0.05859,39.67068,-0.66345
3,2001,48.2475,-1.89837,36.29772,2.58776,0.9717,-26.21683,5.05097,-10.34124,3.55005,...,34.57337,-171.70734,-16.96705,-46.67617,-12.51516,82.58061,-72.08993,9.90558,199.62971,18.85382
4,2001,50.9702,42.20998,67.09964,8.46791,-15.85279,-16.81409,-12.48207,-9.37636,12.63699,...,9.92661,-55.95724,64.92712,-17.72522,-1.49237,-7.50035,51.76631,7.88713,55.66926,28.74903


#### Создание модели

In [15]:
np.random.seed(42)

model = NN([
    Layer(X_train.shape[1], 32, ReLU()),
    Layer(32, 8, ReLU()),
    Layer(8, 1, Linear())
])

learning_rate = 5e-3
cost_func = MSE()

batch_size = 32
batches_count = int(np.ceil(X_train.shape[0] / batch_size))

#### Обучение модели

In [16]:
from sklearn.metrics import mean_squared_error

for epoch in range(4):
    for batch_index, batch_start_index in enumerate(tqdm(range(0, X_train.shape[0], batch_size), f'Epoch {epoch}')):
        batch_x = X_train[batch_start_index:batch_start_index + batch_size, :]
        batch_y = y_train[batch_start_index:batch_start_index + batch_size, :]
        model.train_batch(batch_x, batch_y, cost_func, learning_rate)

        if batch_index % int(np.ceil(batches_count / 5)) == 0:
            print('RMSE on test:', mean_squared_error(y_test, sc_target.inverse_transform(model.forward(X_test)), squared=False))

Epoch 0:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 12.664008045099035
RMSE on test: 9.85827844248504
RMSE on test: 9.352224778076339
RMSE on test: 9.239214708773277
RMSE on test: 9.1379865311595


Epoch 1:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 9.121120902011464
RMSE on test: 9.193855201672555
RMSE on test: 9.105957313645762
RMSE on test: 9.068807036595937
RMSE on test: 9.020488546311297


Epoch 2:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 9.045160202109473
RMSE on test: 9.118188079225346
RMSE on test: 9.045328072754822
RMSE on test: 9.01973606231758
RMSE on test: 8.986081847865291


Epoch 3:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 9.00856045092611
RMSE on test: 9.082702899936438
RMSE on test: 9.015131949426548
RMSE on test: 8.988978231941774
RMSE on test: 8.9647322615182


## Та же модель на PyTorch

In [17]:
import torch
from torch import nn

torch_model = nn.Sequential(
    nn.Linear(X_train.shape[1], 32),
    nn.ReLU(),
    nn.Linear(32, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
)
optimizer = torch.optim.SGD(torch_model.parameters(), lr=learning_rate)
criterion = torch.nn.functional.mse_loss

In [18]:
for epoch in range(4):
    for batch_index, batch_start_index in enumerate(tqdm(range(0, X_train.shape[0], batch_size), f'Epoch {epoch}')):
        batch_x = torch.Tensor(X_train[batch_start_index:batch_start_index + batch_size, :])
        batch_y = torch.Tensor(y_train[batch_start_index:batch_start_index + batch_size, :])
        y_pred = torch_model(batch_x)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_index % int(np.ceil(batches_count / 5)) == 0:
            print('RMSE on test:', mean_squared_error(y_test, sc_target.inverse_transform(torch_model(torch.Tensor(X_test)).detach().numpy()), squared=False))

Epoch 0:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 11.350731
RMSE on test: 9.691548
RMSE on test: 9.280447
RMSE on test: 9.156673
RMSE on test: 9.075457


Epoch 1:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 9.079317
RMSE on test: 9.150316
RMSE on test: 9.066064
RMSE on test: 9.023626
RMSE on test: 8.985299


Epoch 2:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 9.015455
RMSE on test: 9.079324
RMSE on test: 9.010445
RMSE on test: 8.997958
RMSE on test: 8.955013


Epoch 3:   0%|          | 0/14492 [00:00<?, ?it/s]

RMSE on test: 8.982645
RMSE on test: 9.044628
RMSE on test: 8.986022
RMSE on test: 8.968505
RMSE on test: 8.935136
