# Softmax Classificaton

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
torch.manual_seed(1)

<torch._C.Generator at 0x214ad8b5730>

## Discrete Probability Distribution
- 이산적인 확률 분포

## 확률분포에 대한 근사 - Softmax
- ![image](https://user-images.githubusercontent.com/28910538/63824583-a01a5800-c992-11e9-8c95-53a5b42ef7bc.png)

- max값을 뽑는데 부드럽게 뽑아줌
    - 합쳐서 1이 되는 값으로

In [2]:
z = torch.FloatTensor([1, 2, 3])
# argmax -> (0, 0, 1)

In [3]:
hypothesis = F.softmax(z, dim=0)
# 확률 값으로 볼 수 있음 총합은 1
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [4]:
hypothesis.sum()

tensor(1.)

## Cross Entropy
- ![image](https://user-images.githubusercontent.com/28910538/63824623-c213da80-c992-11e9-9f0c-e56a5500940a.png)
- 두 개의 확률분포가 주어졌을때 얼마나 비슷한지
- 엔트로피는 어떤 불확실성 값에 대한 척도
- 이런 엔트로피 통해 실 값과 예측 값에 대한 곱을 통해 두개의 확률 분포가 얼마나 비슷한지를 비교할 수 있고
- 이러한 값이 최소화되도록 학습

### Cross Entropy Loss(Low-level)
![image](https://user-images.githubusercontent.com/28910538/63824410-f6d36200-c991-11e9-8d59-c324fe1129da.png)


In [5]:
z = torch.rand(3, 5, requires_grad=True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.1900, 0.2319, 0.1614, 0.1556, 0.2611],
        [0.2784, 0.1507, 0.1739, 0.2453, 0.1517],
        [0.2223, 0.2053, 0.1560, 0.2443, 0.1721]], grad_fn=<SoftmaxBackward>)


In [7]:
# 정답을 랜덤하게 가정했을 때, classes : 5 samples : 3
y = torch.randint(5, (3,)).long()
print(y)

tensor([2, 0, 1])


In [10]:
# one-hot vector로 표현
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.]])

In [11]:
# (3,5) -> (3,1) -> scalar
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.5620, grad_fn=<MeanBackward0>)


### Cross-entropy Loss with torch.nn.functional

In [12]:
# Low level
torch.log(F.softmax(z, dim=1))

tensor([[-1.6608, -1.4615, -1.8239, -1.8605, -1.3427],
        [-1.2788, -1.8923, -1.7491, -1.4052, -1.8861],
        [-1.5038, -1.5833, -1.8577, -1.4092, -1.7599]], grad_fn=<LogBackward>)

In [13]:
# High level
F.log_softmax(z, dim=1)

tensor([[-1.6608, -1.4615, -1.8239, -1.8605, -1.3427],
        [-1.2788, -1.8923, -1.7491, -1.4052, -1.8861],
        [-1.5038, -1.5833, -1.8577, -1.4092, -1.7599]],
       grad_fn=<LogSoftmaxBackward>)

In [14]:
# Low level
(y_one_hot * -torch.log(F.softmax(z, dim=1))).sum(dim=1).mean()

tensor(1.5620, grad_fn=<MeanBackward0>)

In [15]:
# High Level
# NLL = Negative Log Likelyhood
F.nll_loss(F.log_softmax(z, dim=1), y)

tensor(1.5620, grad_fn=<NllLossBackward>)

In [16]:
F.cross_entropy(z, y)

tensor(1.5620, grad_fn=<NllLossBackward>)

### Training with Low-level Cross Entropy

In [17]:
x_train = [[1,2,1,1],
           [2,1,3,2],
           [3,1,3,4],
           [4,1,5,5],
           [1,7,5,5],
           [1,2,5,6],
           [1,6,6,6],
           [1,7,7,7]
          ]
y_train = [2,2,2,1,1,1,0,0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [26]:
# samples = m
# classes = 3
# dim = 4

# model init
W = torch.zeros((4,3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer
optimizer = optim.SGD([W,b], lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # Cost 계산
    hypothesis = F.softmax(x_train.matmul(W) + b, dim=1)
    y_one_hot = torch.zeros_like(hypothesis)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum()
    # cost로 H(X) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch : {epoch}/{nb_epochs} Cost : {cost.item()}')

Epoch : 0/1000 Cost : 8.788898468017578
Epoch : 100/1000 Cost : 7.378445625305176
Epoch : 200/1000 Cost : 7.205133438110352
Epoch : 300/1000 Cost : 7.114597797393799
Epoch : 400/1000 Cost : 7.053747177124023
Epoch : 500/1000 Cost : 6.994445323944092
Epoch : 600/1000 Cost : 6.897322654724121
Epoch : 700/1000 Cost : 6.746909141540527
Epoch : 800/1000 Cost : 6.700010776519775
Epoch : 900/1000 Cost : 6.672955513000488
Epoch : 1000/1000 Cost : 6.650246620178223


### Training with F.cross_entropy

In [28]:
# samples = m
# classes = 3
# dim = 4

# model init
W = torch.zeros((4,3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer
optimizer = optim.SGD([W,b], lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # Cost 계산
    #hypothesis = F.softmax(x_train.matmul(W) + b, dim=1)
    #y_one_hot = torch.zeros_like(hypothesis)
    #y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    z = x_train.matmul(W) + b
    # cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum()
    cost = F.cross_entropy(z, y_train)
    # cost로 H(X) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch : {epoch}/{nb_epochs} Cost : {cost.item()}')

Epoch : 0/1000 Cost : 1.0986123085021973
Epoch : 100/1000 Cost : 0.7610501050949097
Epoch : 200/1000 Cost : 0.6899910569190979
Epoch : 300/1000 Cost : 0.6432290077209473
Epoch : 400/1000 Cost : 0.6041170358657837
Epoch : 500/1000 Cost : 0.5682553052902222
Epoch : 600/1000 Cost : 0.5339219570159912
Epoch : 700/1000 Cost : 0.5002909898757935
Epoch : 800/1000 Cost : 0.4669080972671509
Epoch : 900/1000 Cost : 0.43350696563720703
Epoch : 1000/1000 Cost : 0.39996233582496643


### High-level Implementation with nn.Module

In [29]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 3)
    
    def forward(self, x):
        return self.linear(x)

In [30]:
model = SoftmaxClassifierModel()

In [32]:
# optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # z = x_train.matmul(W) + b
    # H(X) 게산
    prediction = model(x_train)
    
    # cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum()
    cost = F.cross_entropy(prediction, y_train)
    # cost로 H(X) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch : {epoch}/{nb_epochs} Cost : {cost.item()}')

Epoch : 0/1000 Cost : 4.454987525939941
Epoch : 100/1000 Cost : 0.6869468688964844
Epoch : 200/1000 Cost : 0.5918329358100891
Epoch : 300/1000 Cost : 0.5335536003112793
Epoch : 400/1000 Cost : 0.48711204528808594
Epoch : 500/1000 Cost : 0.4464019536972046
Epoch : 600/1000 Cost : 0.40887588262557983
Epoch : 700/1000 Cost : 0.3729875087738037
Epoch : 800/1000 Cost : 0.33753448724746704
Epoch : 900/1000 Cost : 0.301565021276474
Epoch : 1000/1000 Cost : 0.2653934359550476
