# Softmax Regression

In [1]:
import torch
import torch.nn.functional as F
torch.manual_seed(42)

<torch._C.Generator at 0x2bfa181f0d0>

In [2]:
z = torch.FloatTensor([1, 2, 3])

In [6]:
hypothesis = F.softmax(z, dim=0)
print(hypothesis)
print(hypothesis.sum())

tensor([0.0900, 0.2447, 0.6652])
tensor(1.)


In [55]:
z = torch.rand(3, 5, requires_grad=True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.1836, 0.1593, 0.1500, 0.2396, 0.2675],
        [0.1922, 0.1806, 0.2034, 0.1960, 0.2278],
        [0.2148, 0.1186, 0.1553, 0.2845, 0.2269]], grad_fn=<SoftmaxBackward0>)


In [56]:
y = torch.randint(5, (3,)).long()
print(y)    

tensor([4, 2, 0])


In [57]:
# 모든 원소가 0의 값을 가진 3 x 5 텐서 생성
y_one_hot = torch.zeros_like(hypothesis)
# one hot encoding
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

# print(y.unsqueeze(1))

tensor([[0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.]])

In [58]:
# cost function 구현
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim= 1).mean()
print(cost)

tensor(1.4830, grad_fn=<MeanBackward0>)


In [60]:
# nn.functional에 있는 log_softmax를 이용해서 구하기
# torch.log(F.softmax()) = F.log_softmax()
hypothesis = F.softmax(z, dim=1)
print(torch.log(hypothesis))
print(F.log_softmax(z, dim = 1))

tensor([[-1.6949, -1.8367, -1.8974, -1.4288, -1.3187],
        [-1.6492, -1.7116, -1.5924, -1.6295, -1.4795],
        [-1.5380, -2.1323, -1.8626, -1.2571, -1.4834]], grad_fn=<LogBackward0>)
tensor([[-1.6949, -1.8367, -1.8974, -1.4288, -1.3187],
        [-1.6492, -1.7116, -1.5924, -1.6295, -1.4795],
        [-1.5380, -2.1323, -1.8626, -1.2571, -1.4834]],
       grad_fn=<LogSoftmaxBackward0>)


In [61]:
# nn.functional에 있는 cross_entropy를 이용해서 구하기
# F.nll_loss(F.log_softmax(), y_data) => F.cross_entropy()
# nll : Negative Log Likelihood
print((y_one_hot * -torch.log(F.softmax(z, dim = 1))).sum(dim=1).mean())
print((y_one_hot * -F.log_softmax(z, dim =1)).sum(dim=1).mean())
print(F.nll_loss(F.log_softmax(z, dim=1), y))
print(F.cross_entropy(z, y))

tensor(1.4830, grad_fn=<MeanBackward0>)
tensor(1.4830, grad_fn=<MeanBackward0>)
tensor(1.4830, grad_fn=<NllLossBackward0>)
tensor(1.4830, grad_fn=<NllLossBackward0>)


In [None]:
# 구현 (low level - not use F.cross_entropy())
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(42)

<torch._C.Generator at 0x2bfa181f0d0>

In [69]:
x_train = [[1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)
print(x_train.shape, y_train.shape)

torch.Size([8, 4]) torch.Size([8])


In [70]:
y_one_hot = torch.zeros(8, 3)
y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
print(y_one_hot.shape)

torch.Size([8, 3])


In [74]:
# 모델 초기화
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros((1, 3), requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([W, b], lr=0.1)

In [None]:
nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    hypothesis = F.softmax(x_train.matmul(W)+ b, dim = 1)
    cost = (y_one_hot * -torch.log(hypothesis)).sum(dim = 1).mean()
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch: {:4d}/{}, Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch:    0/1000, Cost: 1.098612
Epoch:  100/1000, Cost: 0.704200
Epoch:  200/1000, Cost: 0.623000
Epoch:  300/1000, Cost: 0.565717
Epoch:  400/1000, Cost: 0.515291
Epoch:  500/1000, Cost: 0.467662
Epoch:  600/1000, Cost: 0.421278
Epoch:  700/1000, Cost: 0.375402
Epoch:  800/1000, Cost: 0.329765
Epoch:  900/1000, Cost: 0.285072
Epoch: 1000/1000, Cost: 0.248155


In [78]:
print(F.softmax(x_train.matmul(W) + b, dim = 1))

tensor([[1.1909e-04, 7.5725e-03, 9.9231e-01],
        [3.9348e-03, 1.3411e-01, 8.6196e-01],
        [6.9056e-06, 2.6787e-01, 7.3212e-01],
        [3.1690e-05, 7.8606e-01, 2.1390e-01],
        [3.2070e-01, 6.5953e-01, 1.9770e-02],
        [1.6596e-01, 8.3388e-01, 1.6129e-04],
        [6.3023e-01, 3.6950e-01, 2.7694e-04],
        [8.0781e-01, 1.9218e-01, 1.4840e-05]], grad_fn=<SoftmaxBackward0>)


In [111]:
# 구현 (high level - use F.cross_entropy())

W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros((1, 3), requires_grad=True)

optimizer = optim.SGD([W, b], lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    z = x_train.matmul(W) + b
    cost = F.cross_entropy(z, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))
    
    

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.704199
Epoch  200/1000 Cost: 0.622999
Epoch  300/1000 Cost: 0.565717
Epoch  400/1000 Cost: 0.515291
Epoch  500/1000 Cost: 0.467662
Epoch  600/1000 Cost: 0.421278
Epoch  700/1000 Cost: 0.375401
Epoch  800/1000 Cost: 0.329766
Epoch  900/1000 Cost: 0.285073
Epoch 1000/1000 Cost: 0.248155


In [124]:
# 모델을 선언 및 초기화. 4개의 특성을 가지고 3개의 클래스로 분류. input_dim=4, output_dim=3.
torch.manual_seed(42)
model = nn.Linear(4, 3) # W : (4, 3)과 b : (,3) 이 내부적으로 자동으로 만들어짐

optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    prediction = model(x_train)
    
    # cross_entropy함수를 이용하기 떄문에 굳이 softmax함수나 y_one_hot을 따로 만들지 않음.
    cost = F.cross_entropy(prediction, y_train) 
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch: {:4d}/{}, Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch:    0/1000, Cost: 2.035638
Epoch:  100/1000, Cost: 0.723868
Epoch:  200/1000, Cost: 0.641093
Epoch:  300/1000, Cost: 0.583296
Epoch:  400/1000, Cost: 0.532822
Epoch:  500/1000, Cost: 0.485345
Epoch:  600/1000, Cost: 0.439220
Epoch:  700/1000, Cost: 0.393651
Epoch:  800/1000, Cost: 0.348252
Epoch:  900/1000, Cost: 0.303179
Epoch: 1000/1000, Cost: 0.261096


In [118]:
# class 이용
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 3)
        
    def forward(self, x):
        return self.linear(x)

In [None]:
torch.manual_seed(42) # 순서를 다시 42 seed로 고정
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000

for epoch in range(nb_epochs + 1):
    prediction = model(x_train)
    cost = F.cross_entropy(prediction, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch: {:4d}/{}, Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch:    0/1000, Cost: 2.035638
Epoch:  100/1000, Cost: 0.723868
Epoch:  200/1000, Cost: 0.641093
Epoch:  300/1000, Cost: 0.583296
Epoch:  400/1000, Cost: 0.532822
Epoch:  500/1000, Cost: 0.485345
Epoch:  600/1000, Cost: 0.439220
Epoch:  700/1000, Cost: 0.393651
Epoch:  800/1000, Cost: 0.348252
Epoch:  900/1000, Cost: 0.303179
Epoch: 1000/1000, Cost: 0.261096


In [108]:
print(F.softmax(model(x_train), dim=1))

tensor([[1.2506e-04, 7.4764e-03, 9.9240e-01],
        [4.0909e-03, 1.3132e-01, 8.6459e-01],
        [5.3603e-06, 2.6328e-01, 7.3671e-01],
        [2.4481e-05, 7.8217e-01, 2.1780e-01],
        [3.3056e-01, 6.4963e-01, 1.9818e-02],
        [1.7142e-01, 8.2840e-01, 1.7474e-04],
        [6.4769e-01, 3.5203e-01, 2.7602e-04],
        [8.1926e-01, 1.8073e-01, 1.4660e-05]], grad_fn=<SoftmaxBackward0>)
