In [None]:
#
# Project:
#      PyTorch Dojo (https://github.com/wo3kie/ml-dojo)
#
# Author:
#      Lukasz Czerwinski (https://www.lukaszczerwinski.pl/)
#

$$ H(X, Y) = \sum_{e \in \mathcal{E}} P(X=e) \, \log_2 \left( \frac{1}{P(Y=e)} \right) $$
$$ \\[2em]$$
$$ H(X, Y) = \text{How many bits on average, using probabilities from X, and codes from Y.} $$
$$ \\[2em] $$
$$ \text{Example} $$
$$ \text{X: 8/10 "S", 2/10 "RAIN"} $$
$$ \text{X: "S", "RAIN", "S", "S", "S", "S", "S", "RAIN", "S", "S", ...} $$
$$ \\[2em] $$
$$ \text{Y: 3/10 "SUN", 7/10 "R"} $$
$$ \text{Y: "R", "SUN", "R", "R", "R", "SUN", "R", "SUN", "R", "R", ...} $$
$$ \\[2em]$$
$$ X=\Bigg[ sunny=\frac{8}{10}, rainy=\frac{2}{10} \Bigg] $$
$$ Y=\Bigg[ sunny=\frac{3}{10}, rainy=\frac{7}{10} \Bigg] $$
$$ \\[2em]$$
$$ H( X, Y) = P(X=\text{sunny}) \, \log_2 \Bigg( \frac{1}{P(Y=\text{sunny})} \Bigg) + P(X=\text{rainy}) \, \log_2 \Bigg( \frac{1}{P(Y=\text{rainy})} \Bigg) $$
$$ crossentropy = 8/10 \cdot \log_2 10/3 + 2/10 \cdot \log_2 10/7 = 1.492 $$
$$ entropy = H( X, X) = 8/10 \cdot \log_2 10/8 + 2/10 \cdot \log_2 10/2 = 0.722 $$
$$ \text{With non‑optimal coding, we use 0.770 more bits than necessary.} $$

In [None]:
import torch

import import_ipynb
from common import assert_eq, assert_ne, T # type: ignore
import common # type: ignore

def _cross_entropy_d(dist_x, dist_y):
    """
    Calculates the cross-entropy of two distributions.
    """

    assert_eq(isinstance(dist_x, torch.Tensor), True)
    assert_eq(isinstance(dist_y, torch.Tensor), True)
    assert_eq(dist_y.sum().item(), 1.0, atol=0.01)

    return -(dist_x * (dist_y.clamp(1e-10)).log2()).sum().item()


def cross_entropy(iterable_x, iterable_y, count=False, norm=False):
    """
    Calculates the cross-entropy of two distributions (count=False), or vectors with samples (count=True).
    """

    iterable_x = T(iterable_x)
    iterable_y = T(iterable_y)

    if count == True:
        iterable_x = common.count(iterable_x)[1]
        iterable_y = common.count(iterable_y)[1]

    if (count == True) or (norm == True):
        iterable_x = iterable_x / iterable_x.sum()
        iterable_y = iterable_y / iterable_y.sum()

    return _cross_entropy_d(iterable_x, iterable_y)


def test_cross_entropy_1():
    # H(X, Y) = cross-entropy(X, Y) = how many bits more, on average, using probabilities from X, and codes from Y
    assert_eq(cross_entropy([8/10, 2/10], [3/10, 7/10]), 1.492, atol=0.01)
    
    # H(X) = H(X, X) = entropy(x) = optimal code lenght for X in bits
    assert_eq(cross_entropy([8/10, 2/10], [8/10, 2/10]), 0.722, atol=0.01)


def test_cross_entropy_2():
    assert_eq(cross_entropy([1], [1]), 0.0, atol=0.01)
    assert_eq(cross_entropy([1/2, 1/2], [1/2, 1/2]), 1.0, atol=0.01)
    assert_eq(cross_entropy([1/4, 1/4, 1/4, 1/4], [1/4, 1/4, 1/4, 1/4]), 2.0, atol=0.01)

    assert_eq(cross_entropy([1, 0, 1, 0, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1], count=True), 0.000, atol=0.01)
    assert_eq(cross_entropy([1, 0, 1, 0, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], count=True), 1.207, atol=0.01)
    assert_eq(cross_entropy([1, 0, 1, 0, 1, 0, 1, 0], [1, 1, 1, 1, 0, 0, 0, 0], count=True), 1.000, atol=0.01)
    assert_eq(cross_entropy([1, 0, 1, 0, 1, 0, 1, 0], [1, 1, 0, 0, 0, 0, 0, 0], count=True), 1.207, atol=0.01)


def test_cross_entropy_3():       
    # 
    # Cross‑entropy, although commonly used as a loss function, is not symmetric and does not constitute a distance metric.
    #
    assert_ne(cross_entropy([0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1], count=True, norm=True), 0.000)

    #             predicted probability of sun
    #        predicted probability of rain    \
    #      actual probability of sun      \    \
    # actual probability of rain    \      \    \
    #                           \    \      \    \
    assert_eq(cross_entropy([0.1, 0.9], [0.1, 0.9], norm=True), 0.468, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.2, 0.8], norm=True), 0.521, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.3, 0.7], norm=True), 0.636, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.4, 0.6], norm=True), 0.795, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.5, 0.5], norm=True), 1.000, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.6, 0.4], norm=True), 1.263, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.7, 0.3], norm=True), 1.614, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.8, 0.2], norm=True), 2.121, atol=0.01)
    assert_eq(cross_entropy([0.1, 0.9], [0.9, 0.1], norm=True), 3.004, atol=0.01)


    mean_squared_error = lambda x, y: ((T(x) - T(y)) ** 2).mean().item()

    #                  predicted probability of sun
    #             predicted probability of rain    \
    #           actual probability of sun      \    \
    #      actual probability of rain    \      \    \
    #                                \    \      \    \
    assert_eq(mean_squared_error([0.1, 0.9], [0.1, 0.9]), 0.000, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.2, 0.8]), 0.009, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.3, 0.7]), 0.039, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.4, 0.6]), 0.089, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.5, 0.5]), 0.159, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.6, 0.4]), 0.249, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.7, 0.3]), 0.359, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.8, 0.2]), 0.489, atol=0.01)
    assert_eq(mean_squared_error([0.1, 0.9], [0.9, 0.1]), 0.639, atol=0.01)


if __name__ == "__main__":
    test_cross_entropy_1()
    test_cross_entropy_2()
    test_cross_entropy_3()