In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn

In [2]:
z = torch.FloatTensor([1,2,3])

Pytorch has a softmax function.

In [3]:
hypothesis = F.softmax(z, dim=0)
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [4]:
hypothesis.sum()

tensor(1.)

In [5]:
z = torch.rand(3,5, requires_grad = True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.1452, 0.1372, 0.3195, 0.2089, 0.1893],
        [0.1394, 0.1848, 0.2237, 0.1434, 0.3087],
        [0.2025, 0.3293, 0.1602, 0.1747, 0.1333]], grad_fn=<SoftmaxBackward>)


In [6]:
# 정답 y 
y = torch.randint(5, (3,))
print(y)

tensor([0, 2, 1])


We can change hypothesis to binary prediction by comparing them to 0.5

In [7]:
# hypothesis 와 같은 사이즈의  빈 배열을 만든다
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1 )

tensor([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.]])

In [8]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.5128, grad_fn=<MeanBackward0>)


Training with low-level Cross Entroypy Loss

In [9]:
x_train = [[1,2,1,1],
          [2,1,3,2],
          [3,1,3,4],
          [4,1,5,5],
          [1,7,5,5],
          [1,2,5,6],
          [1,6,6,6],
          [1,7,7,7]]
y_train = [2,2,2,1,1,1,0,0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [13]:
# 모델 초기화 
W = torch.zeros((4,3), requires_grad = True)
b = torch.zeros(1, requires_grad = True)

# optimizer 설정 
optimizer = torch.optim.SGD([W,b], lr=1)

nb_epochs = 1000
for epoch in range(nb_epochs) : 
    
    # Cost 계산
    hypothesis = torch.sigmoid(x_train.matmul(W) + b)
    cost = F.cross_entropy(hypothesis, y_train)
    
    # prameter update
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력 
    if epoch % 100 == 0 : 
        print(f'Epoch {epoch}/{nb_epochs} , Cost : {cost.item()}')


Epoch 0/1000 , Cost : 1.0986123085021973
Epoch 100/1000 , Cost : 0.8129028081893921
Epoch 200/1000 , Cost : 0.780288815498352
Epoch 300/1000 , Cost : 0.7650819420814514
Epoch 400/1000 , Cost : 0.7557728886604309
Epoch 500/1000 , Cost : 0.7492364048957825
Epoch 600/1000 , Cost : 0.7443036437034607
Epoch 700/1000 , Cost : 0.7404119968414307
Epoch 800/1000 , Cost : 0.7372464537620544
Epoch 900/1000 , Cost : 0.7346133589744568


In [26]:
import torch
import torch.nn.functional as F
import torch.nn as nn

In [16]:
class SoftmaxClassifierModel(nn.Module) : 
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4,3)
        
    def forward(self,x):
        return self.linear(x)

In [18]:
# 모델 초기화 
model = SoftmaxClassifierModel()
# optimizer 설정 
optimizer = torch.optim.SGD(model.parameters(), lr=1)

nb_epochs = 1000
for epoch in range(nb_epochs) :  
    # Cost 계산
    hypothesis = model(x_train)
    cost = F.cross_entropy(hypothesis, y_train)
    
    # prameter update
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력 
    if epoch % 100 == 0 : 
        print(f'Epoch {epoch}/{nb_epochs} , Cost : {cost.item()}')

Epoch 0/1000 , Cost : 1.6002955436706543
Epoch 100/1000 , Cost : 8.659186363220215
Epoch 200/1000 , Cost : 1.7698242664337158
Epoch 300/1000 , Cost : 1.6256897449493408
Epoch 400/1000 , Cost : 1.8773658275604248
Epoch 500/1000 , Cost : 2.0513205528259277
Epoch 600/1000 , Cost : 3.097203254699707
Epoch 700/1000 , Cost : 1.2486099004745483
Epoch 800/1000 , Cost : 0.8728685975074768
Epoch 900/1000 , Cost : 0.0035334948915988207


In [19]:
hypothesis # 예측값 

tensor([[-33.7473,   2.1603,  34.0113],
        [-19.8732,   6.8174,  14.5250],
        [-62.3554,  29.1691,  37.2332],
        [-50.4478,  32.5872,  22.0757],
        [  2.4817,   7.1825,  -0.5795],
        [  1.8612,  21.5579, -18.9747],
        [ 15.9636,  11.2257, -18.9673],
        [ 25.5124,  12.7910, -28.6972]], grad_fn=<AddmmBackward>)

In [22]:
F.softmax(hypothesis, dim=1)

tensor([[3.7395e-30, 1.4699e-14, 1.0000e+00],
        [1.1505e-15, 4.4923e-04, 9.9955e-01],
        [5.6052e-44, 3.1454e-04, 9.9969e-01],
        [8.6765e-37, 9.9997e-01, 2.7220e-05],
        [9.0027e-03, 9.9058e-01, 4.2158e-04],
        [2.7913e-09, 1.0000e+00, 2.4940e-18],
        [9.9132e-01, 8.6812e-03, 6.6979e-16],
        [1.0000e+00, 2.9863e-06, 2.8644e-24]], grad_fn=<SoftmaxBackward>)

In [27]:
F.softmax(hypothesis, dim=1).sum(dim=1)

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
       grad_fn=<SumBackward1>)