In [1]:
import numpy as np

# Ground truth labels (one-hot encoded)
y_true = np.array([
    [1, 0, 0],  # Sample 1: Class 1
    [0, 1, 0],  # Sample 2: Class 2
    [0, 0, 1]   # Sample 3: Class 3
])

# Model predictions (logits)
logits = np.array([
    [2.0, 1.0, 0.1],  # Sample 1
    [0.5, 2.5, 0.2],  # Sample 2
    [0.1, 0.2, 3.0]   # Sample 3
])

# Softmax function
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))  # Numerical stability
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# Compute softmax probabilities
probs = softmax(logits)
print("Softmax Probabilities:\n", probs)

# Compute log(probs)
log_probs = np.log(probs)
print("\nLog of Softmax Probabilities:\n", log_probs)

# Compute y_true * log(probs)
y_true_log_probs = y_true * log_probs
print("\ny_true * log(probs):\n", y_true_log_probs)

# Compute CE loss for each sample
ce_loss = -np.sum(y_true_log_probs, axis=-1)
print("\nCE Loss per Sample:", ce_loss)

# Average CE loss for the dataset
avg_ce_loss = np.mean(ce_loss)
print("\nAverage CE Loss for the Dataset:", avg_ce_loss)

Softmax Probabilities:
 [[0.65900114 0.24243297 0.09856589]
 [0.10953053 0.80932725 0.08114221]
 [0.04931133 0.05449744 0.89619123]]

Log of Softmax Probabilities:
 [[-0.41703002 -1.41703002 -2.31703002]
 [-2.21155193 -0.21155193 -2.51155193]
 [-3.00960146 -2.90960146 -0.10960146]]

y_true * log(probs):
 [[-0.41703002 -0.         -0.        ]
 [-0.         -0.21155193 -0.        ]
 [-0.         -0.         -0.10960146]]

CE Loss per Sample: [0.41703002 0.21155193 0.10960146]

Average CE Loss for the Dataset: 0.24606113655369521
