In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
torch.manual_seed(1)

<torch._C.Generator at 0x12ffae69fd0>

### Cross-entropy Loss with torch.nn.functional

In [6]:
z = torch.rand(3, 5, requires_grad = True)
hypothesis = F.softmax(z, dim = 1)
y = torch.randint(5, (3,)).long()
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[0., 0., 0., 1., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.]])

In [7]:
torch.log(F.softmax(z, dim = 1)) # not recommend

tensor([[-1.2127, -1.4941, -1.8359, -1.9517, -1.7335],
        [-1.8912, -1.8358, -1.6516, -1.3775, -1.4038],
        [-2.3011, -1.5023, -1.6726, -1.3380, -1.4823]], grad_fn=<LogBackward>)

In [8]:
F.log_softmax(z, dim = 1) # Recommend

tensor([[-1.2127, -1.4941, -1.8359, -1.9517, -1.7335],
        [-1.8912, -1.8358, -1.6516, -1.3775, -1.4038],
        [-2.3011, -1.5023, -1.6726, -1.3380, -1.4823]],
       grad_fn=<LogSoftmaxBackward>)

In [9]:
(y_one_hot * -torch.log(F.softmax(z, dim = 1))).sum(dim = 1).mean()

tensor(1.6471, grad_fn=<MeanBackward0>)

In [10]:
F.nll_loss(F.log_softmax(z, dim = 1),y.long()) # recommend

tensor(1.6471, grad_fn=<NllLossBackward>)

In [11]:
F.cross_entropy(z, y) # combine F.log_softmax() and F.nll_loss()

tensor(1.6471, grad_fn=<NllLossBackward>)

In [13]:
xy = np.loadtxt("D:/torch/data-04-zoo.csv", delimiter = ",", dtype=np.float32)

In [14]:
x_train = torch.FloatTensor(xy[:, 0:-1])
y_train = torch.LongTensor(xy[:,-1]).squeeze()

In [15]:
print(x_train.shape)
print(len(x_train))
print(x_train[:5])

torch.Size([101, 16])
101
tensor([[1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 4., 0., 0., 1.],
        [1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 4., 1., 0., 1.],
        [0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0.],
        [1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 4., 0., 0., 1.],
        [1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 4., 1., 0., 1.]])


In [16]:
print(y_train.shape)
print(len(y_train))
print(y_train[:5])

torch.Size([101])
101
tensor([0, 0, 3, 0, 0])


In [17]:
nb_classes = 7
y_one_hot = torch.zeros((len(y_train), nb_classes))
y_one_hot = y_one_hot.scatter(1, y_train.unsqueeze(1), 1)

### Training with F.cross_entropy

In [19]:
W = torch.zeros((16, 7), requires_grad = True)
b = torch.zeros(1, requires_grad = True)
optimizer = optim.SGD([W, b], lr = 0.1)
nb_epochs = 1000

for epoch in range(nb_epochs + 1):
    z = x_train.matmul(W) + 1
    cost = F.cross_entropy(z, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print("Epochs {:4d}/{} cost:{:.6f}".format(epoch, nb_epochs, cost.item()))

Epochs    0/1000 cost:1.945909
Epochs  100/1000 cost:0.471836
Epochs  200/1000 cost:0.326327
Epochs  300/1000 cost:0.257839
Epochs  400/1000 cost:0.215762
Epochs  500/1000 cost:0.186603
Epochs  600/1000 cost:0.164898
Epochs  700/1000 cost:0.147955
Epochs  800/1000 cost:0.134279
Epochs  900/1000 cost:0.122962
Epochs 1000/1000 cost:0.113422


### High-level Implementation with nn.Module

In [22]:
class SoftmaxClassificationModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(16, 7)
        
    def forward(self, x):
        return self.linear(x)

In [23]:
model = SoftmaxClassificationModel()

In [25]:
optimizer = optim.SGD(model.parameters(), lr = 0.1)
nb_epochs = 1000

for epoch in range(nb_epochs + 1):
    prediction = model(x_train)
    cost = F.cross_entropy(prediction, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch    0/1000 cost: 2.297726
Epoch  100/1000 cost: 0.497828
Epoch  200/1000 cost: 0.337389
Epoch  300/1000 cost: 0.261103
Epoch  400/1000 cost: 0.214214
Epoch  500/1000 cost: 0.181953
Epoch  600/1000 cost: 0.158229
Epoch  700/1000 cost: 0.139989
Epoch  800/1000 cost: 0.125509
Epoch  900/1000 cost: 0.113731
Epoch 1000/1000 cost: 0.103965
