In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
z = torch.FloatTensor([1, 2, 3])

# softmax

In [5]:
#softmax
hypothesis = F.softmax(z, dim = 0)
print(hypothesis)
print(hypothesis.sum())

tensor([0.0900, 0.2447, 0.6652])
tensor(1.)


# cross entropy

In [6]:
z = torch.rand(3,5,requires_grad=True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.1063, 0.2182, 0.2422, 0.2529, 0.1805],
        [0.2203, 0.1774, 0.2266, 0.1790, 0.1967],
        [0.1497, 0.1979, 0.3255, 0.1442, 0.1827]], grad_fn=<SoftmaxBackward>)


In [7]:
y = torch.randint(5,(3,)).long()#one-hot vector의 인덱스값
print(y)

tensor([0, 2, 0])


In [10]:
y_one_hot = torch.zeros_like(ypothesis)
y_one_hot.scatter_(1,y.unsqueeze(1),1)#1은 dim, y.unsqueeze는 (3,)->(3,1)

tensor([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.]])

In [11]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean() #(3,5) ->sum -> (3,1) -> mean -> scalar
print(cost)

tensor(1.8753, grad_fn=<MeanBackward0>)


In [12]:
#low level
torch.log(F.softmax(z, dim=1))
#high level
F.log_softmax(z,dim=1)
#negative log likelihood
F.nll_loss(F.log_softmax(z,dim=1),y) #뒤의 sum과 mean을 생략할 수 있음
#더 간단
F.cross_entropy(z,y)

tensor(1.8753, grad_fn=<NllLossBackward>)

In [13]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)

y_train = torch.LongTensor(y_train) #one-hot vector로 바꿀거니까

In [15]:
# samples = 4, classses = 3, dim = 4
#모델 초기화
W = torch.zeros((4,3), requires_grad = True)
b = torch.zeros(1, requires_grad=True)

#optimizer
optimizer = optim.SGD([W,b],lr = 0.1)

nb_epochs = 1000

for epoch in range(nb_epochs + 1):
  z = x_train.matmul(W) + b
  cost = F.cross_entropy(z, y_train) #multicalss loss : cross_entropy

  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  if epoch % 100 == 0:
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.761050
Epoch  200/1000 Cost: 0.689991
Epoch  300/1000 Cost: 0.643229
Epoch  400/1000 Cost: 0.604117
Epoch  500/1000 Cost: 0.568256
Epoch  600/1000 Cost: 0.533922
Epoch  700/1000 Cost: 0.500291
Epoch  800/1000 Cost: 0.466908
Epoch  900/1000 Cost: 0.433507
Epoch 1000/1000 Cost: 0.399962


Implementation with nn.Module

In [16]:
#with nn.Module
class SoftmaxClassifierModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(4,3)

  def forward(self,x):
    return self.linear(x)

In [17]:
model = SoftmaxClassifierModel()

In [21]:
optimizer = optim.SGD(model.parameters(), lr = 0.1)
nb_epochs = 1000

for epoch in range(nb_epochs + 1):
  prediction = model(x_train)
  cost = F.cross_entropy(prediction, y_train)

  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  if epoch % 100 == 0:
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 0.256093
Epoch  100/1000 Cost: 0.235759
Epoch  200/1000 Cost: 0.224217
Epoch  300/1000 Cost: 0.213733
Epoch  400/1000 Cost: 0.204149
Epoch  500/1000 Cost: 0.195354
Epoch  600/1000 Cost: 0.187255
Epoch  700/1000 Cost: 0.179775
Epoch  800/1000 Cost: 0.172846
Epoch  900/1000 Cost: 0.166412
Epoch 1000/1000 Cost: 0.160421
