# Lab-06 Softmax Classification

In [1]:
import torch
from torch import optim
import torch.nn as nn
import torch.nn.functional as F

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x2a6d65e22a0>

## Softmax

In [3]:
z = torch.FloatTensor([1, 2, 3])

In [4]:
output = F.softmax(z, dim=0)
output

tensor([0.0900, 0.2447, 0.6652])

## Cross Entropy Loss (Low-level)

In [5]:
z = torch.rand(3, 5, requires_grad=True)
output = F.softmax(z, dim=1)
output

tensor([[0.2645, 0.1639, 0.1855, 0.2585, 0.1277],
        [0.2430, 0.1624, 0.2322, 0.1930, 0.1694],
        [0.2226, 0.1986, 0.2326, 0.1594, 0.1868]], grad_fn=<SoftmaxBackward>)

In [6]:
y = torch.randint(5, (3,)).long()
y

tensor([0, 2, 1])

In [7]:
y_one_hot = torch.zeros_like(output)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.]])

In [8]:
(y_one_hot * -torch.log(output))

tensor([[1.3301, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.4602, 0.0000, 0.0000],
        [0.0000, 1.6165, 0.0000, 0.0000, 0.0000]], grad_fn=<MulBackward0>)

In [9]:
cost = (y_one_hot * -torch.log(output)).sum(dim=1).mean()
cost

tensor(1.4689, grad_fn=<MeanBackward0>)

## Cross Entropy Loss with F

In [10]:
F.log_softmax(z, dim=1)

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward>)

In [11]:
(y_one_hot * -F.log_softmax(z, dim=1)).sum(dim=1).mean()

tensor(1.4689, grad_fn=<MeanBackward0>)

In [12]:
F.nll_loss(F.log_softmax(z, dim=1), y)

tensor(1.4689, grad_fn=<NllLossBackward>)

In [13]:
F.cross_entropy(z, y)

tensor(1.4689, grad_fn=<NllLossBackward>)

# Training with Cross Entropy Loss

In [14]:
x_train = [[1, 2, 1, 1], 
           [2, 1, 3, 2], 
           [3, 1, 3, 4], 
           [4, 1, 5, 5], 
           [1, 2, 5, 5], 
           [1, 2, 5, 6], 
           [1, 6, 6, 6], 
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [15]:
W = torch.randn((x_train.size(1), 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

In [16]:
optimizer = optim.Adam([W, b], lr=0.1)

In [17]:
n_epoch = 1000
for epoch in range(n_epoch):
    output = F.softmax(x_train.matmul(W) + b, dim=1)
    y_one_hot = torch.zeros_like(output)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    loss = (y_one_hot * -torch.log(F.softmax(output, dim=1))).sum(dim=1).mean()
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, n_epoch, loss.item()
        ))

Epoch    0/1000 Cost: 1.297004
Epoch  100/1000 Cost: 0.603911
Epoch  200/1000 Cost: 0.575870
Epoch  300/1000 Cost: 0.565488
Epoch  400/1000 Cost: 0.560671
Epoch  500/1000 Cost: 0.558033
Epoch  600/1000 Cost: 0.556418
Epoch  700/1000 Cost: 0.555349
Epoch  800/1000 Cost: 0.554601
Epoch  900/1000 Cost: 0.554054


In [18]:
output

tensor([[1.1478e-06, 9.8378e-14, 1.0000e+00],
        [1.8411e-04, 7.8985e-03, 9.9192e-01],
        [8.1140e-06, 8.5545e-03, 9.9144e-01],
        [8.8808e-08, 9.9023e-01, 9.7742e-03],
        [5.1221e-04, 9.9949e-01, 2.2243e-12],
        [7.2672e-05, 9.9993e-01, 3.8230e-15],
        [9.9948e-01, 1.5099e-13, 5.1911e-04],
        [9.9998e-01, 1.2399e-15, 2.0101e-05]], grad_fn=<SoftmaxBackward>)

In [19]:
output.round()

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]], grad_fn=<RoundBackward>)

## with F.cross_entropy()

In [20]:
W = torch.randn((x_train.size(1), 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = optim.SGD([W, b], lr=0.1)

n_epoch = 1000
for epoch in range(n_epoch):
    output = F.softmax(x_train.matmul(W) + b, dim=1)
    loss = F.cross_entropy(output, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, n_epoch, loss.item()
        ))

Epoch    0/1000 Cost: 1.214819
Epoch  100/1000 Cost: 1.201358
Epoch  200/1000 Cost: 1.195031
Epoch  300/1000 Cost: 1.191031
Epoch  400/1000 Cost: 1.188324
Epoch  500/1000 Cost: 1.186386
Epoch  600/1000 Cost: 1.184932
Epoch  700/1000 Cost: 1.183797
Epoch  800/1000 Cost: 1.182879
Epoch  900/1000 Cost: 1.182111


In [21]:
output

tensor([[5.0742e-02, 5.1873e-03, 9.4407e-01],
        [9.9791e-01, 2.4918e-06, 2.0869e-03],
        [9.9715e-01, 5.4740e-09, 2.8468e-03],
        [9.9999e-01, 9.5403e-13, 5.6036e-06],
        [9.9999e-01, 4.3170e-10, 5.3498e-06],
        [1.0000e+00, 4.0441e-11, 3.8508e-06],
        [9.8861e-01, 8.8158e-09, 1.1388e-02],
        [9.9536e-01, 5.0908e-10, 4.6367e-03]], grad_fn=<SoftmaxBackward>)

In [22]:
output.round()

tensor([[0., 0., 1.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]], grad_fn=<RoundBackward>)

## Cross Entropy Loss (High-level)

In [23]:
class SoftmaxClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(x_train.size(1), 3)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        return self.softmax(self.linear(x))

In [24]:
model = SoftmaxClassifier()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [25]:
n_epoch = 1000
for epoch in range(n_epoch):
    output = model(x_train)
    loss = F.cross_entropy(output, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, n_epoch, loss.item()
        ))

Epoch    0/1000 Cost: 1.153989
Epoch  100/1000 Cost: 0.926352
Epoch  200/1000 Cost: 0.890353
Epoch  300/1000 Cost: 0.872935
Epoch  400/1000 Cost: 0.861319
Epoch  500/1000 Cost: 0.852568
Epoch  600/1000 Cost: 0.845580
Epoch  700/1000 Cost: 0.839817
Epoch  800/1000 Cost: 0.834963
Epoch  900/1000 Cost: 0.830817


In [26]:
output

tensor([[1.3929e-03, 8.2274e-04, 9.9778e-01],
        [1.3058e-03, 1.5006e-01, 8.4864e-01],
        [2.2409e-04, 1.9252e-01, 8.0726e-01],
        [1.3714e-05, 7.9648e-01, 2.0351e-01],
        [4.1641e-06, 9.9831e-01, 1.6866e-03],
        [1.0095e-06, 9.9959e-01, 4.1048e-04],
        [4.2096e-06, 3.7146e-01, 6.2854e-01],
        [7.3428e-07, 5.6831e-01, 4.3169e-01]], grad_fn=<SoftmaxBackward>)

In [27]:
output.round()

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 0.]], grad_fn=<RoundBackward>)