In [1]:
import torch
from d2l import torch as d2l

In [2]:
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim= True)
    return X_exp / partition

In [4]:
X = torch.tensor([[1,2,3]])
X_prob = softmax(X)
print(X_prob)

tensor([[0.0900, 0.2447, 0.6652]])


In [7]:
class SoftmaxRegressionScratch(d2l.Classifier):
    def __init__(self, num_input, num_output, lr, sigma = 0.01):
        super().__init__()
        self.save_hyperparameters()
        self.W = torch.normal(0,sigma, size=(num_input, num_output), requires_grad= True)
        self.b = torch.zeros(size=num_output, requires_grad= True)

    def parameters(self):
        return (self.W, self.b)

In [8]:
@d2l.add_to_class(SoftmaxRegressionScratch)
def forward(self, X):
    X = X.reshape((-1, self.W.shape[0]))
    return softmax(torch.matmul(X, self.W) + self.b)

In [10]:
def cross_entropy(y_hat, y):
    return -torch.log(y_hat[list(range(len(y_hat))), y]).mean()

In [11]:
y = torch.tensor([0,2])
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.2, 0.3, 0.5]])

loss = cross_entropy(y_hat, y)
print(loss)

tensor(1.4979)
