In [None]:
#
# Project:
#      PyTorch Dojo (https://github.com/wo3kie/ml-dojo)
#
# Author:
#      Lukasz Czerwinski (https://www.lukaszczerwinski.pl/)
#

$$ loss(w, b) = L(w, b) = \frac{1}{S} \sum_{i=1}{(y_i - (x_i w + b)) ^ 2} = $$
$$ \frac{1}{S} \sum_{i=1}{2 \, (y_i - (x_i w + b)) \, x_i} = $$
$$ \frac{2}{S} \sum_{i=1}{(y_i - (x_i w + b)) \, x_i} = $$
$$ \frac{2}{S} \sum_{i=1}{(y_i - predicted_i) \,x_i} = $$
$$ \frac{2}{S} \sum_{i=1}{error_i \, x_i} = $$
$$ \frac{2}{S} \, x^T \cdot error $$

$$ loss(w, b) = L(w, b) = \frac{1}{S} \sum_{i=1}^{S} (y_i - (x_i w + b)) ^ 2 = $$
$$ \frac{1}{S} \sum_{i=1}^{S} 2 \, (y_i - (x_i w + b)) \, 1= $$
$$ \frac{2}{S} \sum_{i=1}^{S} ((x_i w + b) - y_i) = $$
$$ \frac{2}{S} \sum_{i=1}^{S} (predicted_i - y_i) = $$
$$ \frac{2}{S} \sum_{i=1}^{S} error_i $$

In [None]:
from torch import full, manual_seed, randn, Size, Tensor

import import_ipynb
from common import equal, T # type: ignore


def linear_regression_sgd_gradient(
        X: Tensor, 
        y: Tensor, 
        epochs=1000, 
        lr=0.1
) -> tuple[Tensor, Tensor, Tensor]:
    """
    Performs linear regression using Stochastic Gradient Descent (SGD) with manual gradient calculation.

    Parameters:
        X: Input features of shape (S, F)
        y: Target values of shape (S, 1)
        epochs: Number of training epochs
        lr: Learning rate

    Returns:
        tuple: (loss, weights, bias)
    """

    (S, F) = X.shape

    assert X.shape == Size([S, F])
    assert y.shape == Size([S, 1])

    w = randn((F, 1))
    b = randn(1)

    for _ in range(epochs):
        predicted = X @ w + b
        assert predicted.shape == Size([S, 1])

        error = predicted - y
        assert error.shape == Size([S, 1])

        dL_dW = (2/S) * X.T @ error
        assert dL_dW.shape == Size([F, 1])

        w = w - lr * dL_dW
        assert w.shape == Size([F, 1])

        dL_db = (2/S) * error.sum()
        assert dL_db.shape == Size([])

        b = b - lr * dL_db
        assert b.shape == Size([1])

        #
        # In the autograd version, computing the loss is required because it serves 
        # as the root of the computational graph for backpropagation. 
        #
        # In the manualâ€‘gradient version, the loss value is not needed for the weight update itself,
        # it is computed only to monitor training progress.
        #
        
        loss = 1/S * (error ** 2).sum()
        assert loss.shape == Size([])

    return (loss, w, b)


def _test_linear_regression_sgd_gradient(S: int, W: Tensor, B: Tensor, epochs=1000, lr=0.1) -> None:
    """
    Tests the linear regression using Stochastic Gradient Descent (SGD) with manual gradient calculation, 
    by generating synthetic data with known weights, and verifies that the computed weights and bias are correct.

    Parameters:
        S: Number of samples
        W: Model's weight(s)
        B: Model's bias
    """

    F = W.shape[0]
    x = randn(S, F)
    assert x.shape == Size([S, F])

    y = x @ W + B
    assert y.shape == Size([S, 1])

    loss, w, b = linear_regression_sgd_gradient(x, y, epochs, lr)
    assert equal(loss, 0.0)
    assert equal(b, B)
    assert equal(w, W)


def test_linear_regression_sgd_gradient() -> None:
    _test_linear_regression_sgd_gradient(10, W=T([[0.1]]), B=0.2)
    _test_linear_regression_sgd_gradient(10, W=T([[0.3], [0.4]]), B=0.5)
    _test_linear_regression_sgd_gradient(10, W=T([[0.6], [0.7], [0.8]]), B=0.9)
    

if __name__ == "__main__":
    test_linear_regression_sgd_gradient()
