In [None]:
#
# Project:
#      PyTorch Dojo (https://github.com/wo3kie/ml-dojo)
#
# Author:
#      Lukasz Czerwinski (https://www.lukaszczerwinski.pl/)
#

$$ SE = (actual_1 - predicted_1)^2 + (actual_2 - predicted_2)^2 + ... + (actual_n - predicted_n)^2 $$
$$ SE = (y_1 - (ax_1+b))^2 + (y_2 - (ax_2+b))^2 + ... + (y_n - (ax_n+b))^2 $$
$$ SE = \sum_{i=1}^{n} \left( y_i^2 - 2y_i(ax_i + b) + (ax_i + b)^2 \right) $$
$$ SE = \sum_{i=1}^{n} y_i^2 - 2a \sum_{i=1}^{n} x_i y_i - 2b \sum_{i=1}^{n} y_i + a^2 \sum_{i=1}^{n} x_i^2 + 2ab \sum_{i=1}^{n} x_i + nb^2 $$
$$ E(x) = \frac{1}{n}\sum_{i=1}^{n} x_i $$
$$ E(x^2) = \frac{1}{n}\sum_{i=1}^{n} x_i^2 $$
$$ E(y) = \frac{1}{n}\sum_{i=1}^{n} y_i $$
$$ E(y^2) = \frac{1}{n}\sum_{i=1}^{n} y_i^2 $$
$$ E(xy) = \frac{1}{n}\sum_{i=1}^{n} x_i y_i $$
$$ SE = nE(y^2) - 2anE(xy) - 2bnE(y) + a^2 nE(x^2) + 2ab nE(x) + nb^2 $$
$$ \frac{\partial SE}{\partial a} = -2nE(xy) + 2anE(x^2) + 2bnE(x) = 0 $$
$$ \frac{\partial SE}{\partial b} = -2nE(y) + 2anE(x) + 2bn = 0 $$
$$ a = \frac{E(x)E(y) - E(xy)}{E(x)^2 - E(x^2)} $$
$$ b = E(y) - aE(x) $$

In [None]:
from torch import arange, float32, randn, round, tensor


def linear_regression_1d_cf(x, y):
    """ 
    Solves linear regression in one dimension using the closed-form solution.
    """
    
    assert(x.dim() == 1)
    assert(y.dim() == 1)

    Ex = x.mean()
    assert(Ex.dim() == 0)
    
    Ey = y.mean()
    assert(Ey.dim() == 0)
    
    Exy = (x*y).mean()
    assert(Exy.dim() == 0)

    Ex2 = (x*x).mean()
    assert(Ex2.dim() == 0)

    a = (Ex * Ey - Exy) / (Ex * Ex - Ex2)
    b = Ey - a*Ex

    slope = a
    intercept = b

    return (slope, intercept)


def _test_linear_regression_1d_cf(A, B, S, M, N):
    """
    Tests the linear regression closed-form solution by generating synthetic data with a known slope and intercept, 
    adding noise, and verifying that the computed slope and intercept are close to the original values.

    Parameters:
        A (float): Model's slope
        B (float): Model's intercept
        S (float): Samples
        M (float): Max value of x
        N (float): Noise level (%)
    """

    noise = N * (M / 100.0) * randn((S, ), dtype=float32)
    assert(noise.shape == (S, ))

    x = arange(0, M, M/S, dtype=float32)
    assert(x.shape == (S, ))

    y = (A * x + B) + noise
    assert(y.shape == (S, ))

    (slope, intercept) = linear_regression_1d_cf(x, y)
    assert(round(slope) == tensor(A))
    assert(round(intercept) == tensor(B))


def test_linear_regression_1d_cf():
    _test_linear_regression_1d_cf(A=1.0, B=1.0, S=100, M=10.0, N=0.0)
    _test_linear_regression_1d_cf(A=2.0, B=3.0, S=100, M=10.0, N=0.0)
    _test_linear_regression_1d_cf(A=3.0, B=6.0, S=100, M=10.0, N=0.0)

    _test_linear_regression_1d_cf(A=-1.0, B=-1.0, S=100, M=10.0, N=0.0)
    _test_linear_regression_1d_cf(A=-2.0, B=-3.0, S=100, M=10.0, N=0.0)
    _test_linear_regression_1d_cf(A=-3.0, B=-6.0, S=100, M=10.0, N=0.0)

    _test_linear_regression_1d_cf(A=1.0, B=1.0, S=100, M=10.0, N=3.0)
    _test_linear_regression_1d_cf(A=2.0, B=3.0, S=100, M=10.0, N=6.0)
    _test_linear_regression_1d_cf(A=3.0, B=6.0, S=100, M=10.0, N=9.0)

    _test_linear_regression_1d_cf(A=-1.0, B=-1.0, S=100, M=10.0, N=3.0)
    _test_linear_regression_1d_cf(A=-2.0, B=-3.0, S=100, M=10.0, N=6.0)
    _test_linear_regression_1d_cf(A=-3.0, B=-6.0, S=100, M=10.0, N=9.0)


if __name__ == "__main__":
    test_linear_regression_1d_cf()