In [None]:
#
# Project:
#      PyTorch Dojo (https://github.com/wo3kie/ml-dojo)
#
# Author:
#      Lukasz Czerwinski (https://www.lukaszczerwinski.pl/)
#

$$
\begin{pmatrix}
y_0 \\
y_1 \\
\vdots \\
y_S \\
\end{pmatrix}

=

\begin{pmatrix}
x_{00} & x_{01} & \cdots & x_{0F} \\
x_{10} & x_{11} & \cdots & x_{1F} \\
\vdots & \vdots & \ddots & \vdots \\
x_{S0} & x_{S1} & \cdots & x_{SF} \\
\end{pmatrix} 

\cdot

\begin{pmatrix}
w_0 \\
w_1 \\
\vdots \\
w_F \\
\end{pmatrix}

+

\begin{pmatrix}
b \\
b \\
\vdots \\
b \\
\end{pmatrix}
$$


In [None]:
from torch import manual_seed, no_grad, randn, Size, Tensor
from torch.nn import Linear, MSELoss
from torch.optim import SGD

import import_ipynb
from common import assert_eq, T # type: ignore


def linear_regression_sgd_autograd(X: Tensor, y: Tensor, epochs=1000, lr=0.1) -> tuple[Tensor, Tensor, Tensor]:
    """
    Performs linear regression using Stochastic Gradient Descent (SGD) with autograd.

    Parameters:
        X: Input features of shape (S, F)
        y: Target values of shape (S, 1)
        epochs: Number of training epochs
        lr: Learning rate

    Returns:
        tuple: (loss, weights, bias)
    """

    (S, F) = X.shape

    assert X.shape == Size([S, F])
    assert y.shape == Size([S, 1])

    model = Linear(in_features=F, out_features=1, bias=True)
    w = model.weight
    b = model.bias

    assert w.shape == Size([1, F])
    assert b.shape == Size([1])

    optimizer = SGD(model.parameters(), lr=lr)

    for _ in range(epochs):
        # w.grad = 0
        # b.grad = 0
        optimizer.zero_grad()
        
        # predicted = X @ w + b
        predicted = model(X)

        # error = predicted - y
        # loss = 1/S * sum(error ** 2).sum()
        loss = MSELoss()(predicted, y)

        # dL_dW += (2/S) * x.T * error
        # dL/db += 2/S * error.sum()
        loss.backward()

        # w = w - lr * dL_dW
        # b = b - lr * dL_db
        optimizer.step()

    return (loss, w, b)


def _test_linear_regression_sgd_autograd(S: int, W: Tensor, B: Tensor, epochs=1000, lr=0.1, atol=0.01) -> None:
    """
    Tests the linear regression using Stochastic Gradient Descent (SGD) with autograd, 
    by generating synthetic data with known weights, and verifies that the computed weights and bias are correct.

    Parameters:
        S: Number of samples
        W: Model's weight(s)
        B: Model's bias
    """

    F = W.shape[0]
    x = randn(S, F)
    assert x.shape == Size([S, F])

    y = x @ W + B
    assert y.shape == Size([S, 1])

    loss, w, b = linear_regression_sgd_autograd(x, y, epochs, lr)
    
    # torch.nn.Linear weights are stored in row vector, but we so we need to reshape it to column vector.
    w = w.reshape(W.shape)

    assert_eq(loss, 0.0, atol)
    assert_eq(b, B, atol)
    assert_eq(w, W, atol)


def test_linear_regression_sgd_autograd() -> None:
    _test_linear_regression_sgd_autograd(10, W=T([[0.1]]), B=0.2)
    _test_linear_regression_sgd_autograd(10, W=T([[0.3], [0.4]]), B=0.5)
    _test_linear_regression_sgd_autograd(200, W=T([[0.6], [0.7], [0.8]]), B=0.9)


if __name__ == "__main__":
    test_linear_regression_sgd_autograd()
