In [None]:
#
# Project:
#      PyTorch Dojo (https://github.com/wo3kie/ml-dojo)
#
# Author:
#      Lukasz Czerwinski (https://www.lukaszczerwinski.pl/)
#

$$ \text{Model} = M(W, b) = Wx+b $$
$$ \frac{\partial M}{\partial W} = x $$
$$ \frac{\partial M}{\partial b} = 1 $$
$$ \\[2em] $$
$$ \text{Sigmoid} = S(M) = \frac{e^M}{e^M+1} $$
$$ \frac{\partial S}{\partial M} = \frac{e^M}{(e^M+1)^2} = S(1-S) $$
$$ \\[2em] $$
$$ \text{Loss} = L(S) = -(y\ln(S)+(1-y)\ln(1-S)) $$
$$ \frac{\partial L}{\partial S} = -\Big(y \frac{1}{S} + (1-y) \frac{1}{1-S}(-1) \Big) = \frac{S-y}{S(1-S)} $$
$$ \frac{\partial L}{\partial W} = \frac{\partial L}{\partial S} \frac{\partial S}{\partial M} \frac{\partial M}{\partial W} = \frac{S-y}{S(1-S)} \, S(1-S) \, x = (S-y)x $$
$$ \frac{\partial L}{\partial b} = \frac{\partial L}{\partial S} \frac{\partial S}{\partial M} \frac{\partial M}{\partial b} = \frac{S-y}{S(1-S)} \, S(1-S) \, 1 = S-y $$

In [None]:
from torch import exp, rand, Tensor

import import_ipynb
from common import Patient, T # type: ignore


def _Linear(X: Tensor, w: Tensor, b: Tensor) -> Tensor:
    return X @ w.T + b


def _Model(X: Tensor, w: Tensor, b: Tensor) -> Tensor:
    return _Linear(X, w, b)


def _Sigmoid(m: Tensor) -> Tensor:
    return exp(m) / (exp(m) + 1)


def _BinaryCrossEntropy(S: Tensor, y: Tensor) -> Tensor:
    return -((y * S.log()) + ((1 - y) * (1 - S).log())).mean()


def _Loss(S: Tensor, y: Tensor) -> Tensor:  
    return _BinaryCrossEntropy(S, y)


def logistic_regression_sgd_gradient(
        X: Tensor, 
        y: Tensor, 
        epochs=2000, 
        lr=0.1
) -> tuple[float, callable]:
    """
    Perform logistic regression using Stochastic Gradient Descent (SGD) with manual gradient calculation..

    Parameters:
        X: Input features of shape (S, F)
        y: Target values of shape (S, 1)
        epochs: Number of training epochs
        lr: Learning rate

    Returns:
        A tuple containing the final loss and a prediction function that takes new input data and returns predicted probabilities.
    """

    (s, f) = X.shape

    w = rand(1, f)
    assert w.shape == (1, f)

    b = rand(1)
    assert b.shape == (1,)

    for _ in range(epochs):
        M = _Model(X, w, b)
        assert M.shape == (s, 1)

        S = _Sigmoid(M)
        assert S.shape == (s, 1)

        dL_dw = (S - y).T @ X
        assert dL_dw.shape == (1, f)

        dL_db = (S - y).mean()
        assert dL_db.shape == ()

        w = w - lr * dL_dw
        b = b - lr * dL_db
        
        #
        # In the autograd version, computing the loss is required because it serves 
        # as the root of the computational graph for backpropagation. 
        #
        # In the manualâ€‘gradient version, the loss value is not needed for the weight update itself,
        # it is computed only to monitor training progress.
        #

        L = _Loss(S, y)

    return (L.item(), lambda x: _Sigmoid(_Model(x, w, b)))


def _test_logistic_regression_sgd_gradient(SL, ST, sick_threshold=0.5, epochs=2000, lr=0.1) -> None:
    """
        Perform a test of the logistic regression implementation using synthetic patient data. 

        The function generates a dataset of patients, splits it into training and testing sets, 
        trains the logistic regression model on the training data, and evaluates its performance on the test data. 
        The test asserts that achieved accuracy is at least 90%.

        Parameters:
            SL: Number of samples in the training dataset
            ST: Number of samples in the test dataset
            sick_threshold: Threshold for determining if a patient is sick based on their data
            epochs: Number of training epochs for the logistic regression model
            lr: Learning rate for the logistic regression model
    """

    #
    # Dataset generation
    #

    training_data = T([Patient(sick_threshold).data for _ in range(SL)])
    test_data = T([Patient(sick_threshold).data for _ in range(ST)])
    (_, f) = (training_data.shape[0], training_data.shape[1] - 1)

    #
    # Training
    #

    X = training_data[:, :-1]
    X[:, 0] /= 100 # Data scaling to make training numerically stable
    assert(X.shape == (SL, f))

    y = training_data[:, -1].unsqueeze(1)
    assert(y.shape == (SL, 1))

    (_, model) = logistic_regression_sgd_gradient(X, y, epochs, lr)

    #
    # Testing
    #

    true_positives = 0
    true_negatives = 0

    for d in test_data:
        X = d[:-1]
        X[0] /= 100 # The same data scaling as during training.
        assert(X.shape == (f,))

        y = d[-1]
        assert(y.shape == ())

        predicted = model(X)

        is_sick_predicted = (predicted >= T(0.5))
        is_sick_actual = (y == T(1))

        if is_sick_predicted and is_sick_actual:
            true_positives += 1
        elif (not is_sick_predicted) and (not is_sick_actual):
            true_negatives += 1
        
    accuracy = (true_positives + true_negatives) / ST
    assert accuracy >= 0.9


def test_logistic_regression_sgd_gradient():
    _test_logistic_regression_sgd_gradient(80, 20, 0.1)
    _test_logistic_regression_sgd_gradient(80, 20, 0.3)
    _test_logistic_regression_sgd_gradient(80, 20, 0.5)
    _test_logistic_regression_sgd_gradient(80, 20, 0.7)
    _test_logistic_regression_sgd_gradient(80, 20, 0.9)


if __name__ == "__main__":
    test_logistic_regression_sgd_gradient()