In [None]:
#
# Project:
#      PyTorch Dojo (https://github.com/wo3kie/ml-dojo)
#
# Author:
#      Lukasz Czerwinski (https://www.lukaszczerwinski.pl/)
#

$$ \text{Model} = M(W, b) = Wx+b $$
$$ \frac{\partial M}{\partial W} = x $$
$$ \frac{\partial M}{\partial b} = 1 $$
$$ \\[2em] $$
$$ \text{Sigmoid} = S(M) = \frac{e^M}{e^M+1} $$
$$ \frac{\partial S}{\partial M} = \frac{e^M}{(e^M+1)^2} = S(1-S) $$
$$ \\[2em] $$
$$ \text{Loss} = L(S) = -(y\ln(S)+(1-y)\ln(1-S)) $$
$$ \frac{\partial L}{\partial S} = -\Big(y \frac{1}{S} + (1-y) \frac{1}{1-S}(-1) \Big) = \frac{S-y}{S(1-S)} $$
$$ \frac{\partial L}{\partial W} = \frac{\partial L}{\partial S} \frac{\partial S}{\partial M} \frac{\partial M}{\partial W} = \frac{S-y}{S(1-S)} \, S(1-S) \, x = (S-y)x $$
$$ \frac{\partial L}{\partial b} = \frac{\partial L}{\partial S} \frac{\partial S}{\partial M} \frac{\partial M}{\partial b} = \frac{S-y}{S(1-S)} \, S(1-S) \, 1 = S-y $$

In [None]:
from torch import exp, rand
from torch.optim import SGD

import import_ipynb
from common import Patient, T # type: ignore


def _Model(X, w, b):
    return (X @ w.T) + b


def _Sigmoid(m):
    return exp(m) / (exp(m) + 1)


def _Loss(S, y):
    """ 
    BCE Loss - Binary Cross-Entropy Loss reduced by mean.
    """
    
    return -((y * S.log()) + ((1 - y) * (1 - S).log())).mean()


def logistic_regression_sgd_gradient(X, y, epochs=2000, lr=0.01):
    """
    Perform logistic regression using stochastic gradient descent with manual gradient computation.

    Parameters:
        X: Input features (shape: [S, F])
        y: Target labels (shape: [S, 1])
        epochs: Number of training iterations
        lr: Learning rate

    Returns:
        A tuple containing the final loss and a prediction function that takes new input data and returns predicted probabilities.
    """

    (S, F) = X.shape

    w = rand(1, F)
    b = rand(1)

    for _ in range(epochs):
        M = _Model(X, w, b)
        S = _Sigmoid(M)
        L = _Loss(S, y)

        dL_dw = (S - y).T @ X
        w = w - lr * dL_dw

        dL_db = (S - y).sum()
        b = b - lr * dL_db

    return (L, lambda x: _Sigmoid(_Model(x, w, b)))


def _test_logistic_regression_sgd_gradient(SL, ST, sick_threshold=0.5, epochs=2000, lr=0.01):
    """
        Perform a test of the logistic regression implementation using synthetic patient data. 
        The function generates a dataset of patients, splits it into training and testing sets, 
        trains the logistic regression model on the training data, and evaluates its performance on the test data. 
        The test asserts that the false positive and false negative rates are below 5%.
    """

    data = T([Patient(sick_threshold).data for _ in range(int(SL + ST))])
    training_data = data[0: SL]
    test_data = data[SL: SL + ST]

    #
    # Training
    #

    X = training_data[:, :-1].clone()
    assert(X.shape == (SL, 5))

    y = training_data[:, -1].clone().unsqueeze(1)
    assert(y.shape == (SL, 1))

    # Super simple data scaling to make the training numerically more stable.
    X[:, 0] /= 100

    (_, model) = logistic_regression_sgd_gradient(X, y, epochs, lr)

    #
    # Testing
    #

    false_positives = 0
    false_negatives = 0

    for d in test_data:
        X = d[:-1].clone()
        assert(X.shape == (5,))

        y = d[-1].clone()
        assert(y.shape == ())

        # The same data scaling as during training.
        X[0] /= 100

        predicted = model(X)

        is_sick_predicted = predicted >= T(0.5)
        is_sick_actual = y == T(1)

        if is_sick_predicted and not is_sick_actual:
            false_positives += 1
        elif not is_sick_predicted and is_sick_actual:
            false_negatives += 1

    assert(false_positives / ST <= 0.05)
    assert(false_negatives / ST <= 0.05)


def test_logistic_regression_sgd_gradient():
    _test_logistic_regression_sgd_gradient(80, 20, 0.1)
    _test_logistic_regression_sgd_gradient(80, 20, 0.3)
    _test_logistic_regression_sgd_gradient(80, 20, 0.5)
    _test_logistic_regression_sgd_gradient(80, 20, 0.7)
    _test_logistic_regression_sgd_gradient(80, 20, 0.9)


if __name__ == "__main__":
    test_logistic_regression_sgd_gradient()