In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x2396330c430>

In [22]:
z = torch.FloatTensor([1,2,3])
h = F.softmax(z, dim=0)
print(h)
print(h.sum())

tensor([0.0900, 0.2447, 0.6652])
tensor(1.)


In [23]:
y = torch.randint(5,(3,)).long() # randint(5,(3,)) : 0~5보다 작은 정수를 3행 1열의 행렬을 발생
print(y)


tensor([0, 4, 4])


In [24]:
z = torch.rand(3,5, requires_grad=True)
print(z)
h = F.softmax(z, dim=1)
print(h)

tensor([[0.7347, 0.0293, 0.7999, 0.3971, 0.7544],
        [0.5695, 0.4388, 0.6387, 0.5247, 0.6826],
        [0.3051, 0.4635, 0.4550, 0.5725, 0.4980]], requires_grad=True)
tensor([[0.2328, 0.1150, 0.2485, 0.1661, 0.2375],
        [0.1990, 0.1746, 0.2133, 0.1903, 0.2228],
        [0.1709, 0.2002, 0.1985, 0.2232, 0.2072]], grad_fn=<SoftmaxBackward0>)


In [25]:
y_one_hot = torch.zeros_like(h)
print(y_one_hot)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [26]:
print(y.unsqueeze(1)) # unsqueeze(1) : 1차원 행렬을 포함하는 2차원 행렬로 만듬
y_one_hot.scatter_(1, y.unsqueeze(1), 1) # scatter : dim, index, source 순으로 들어감 / dim(방향을의미) : 1일 경우 행을 의미
print(y_one_hot)

tensor([[0],
        [4],
        [4]])
tensor([[1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.]])


In [27]:
cost = (y_one_hot* - torch.log(h)).sum(dim=1).mean()
print(cost)

tensor(1.5109, grad_fn=<MeanBackward0>)


In [28]:
torch.log(F.softmax(z, dim=1))

tensor([[-1.4574, -2.1628, -1.3922, -1.7949, -1.4377],
        [-1.6144, -1.7452, -1.5452, -1.6593, -1.5013],
        [-1.7669, -1.6085, -1.6170, -1.4995, -1.5740]], grad_fn=<LogBackward0>)

In [29]:
torch.log_softmax(z, dim=1)

tensor([[-1.4574, -2.1628, -1.3922, -1.7949, -1.4377],
        [-1.6144, -1.7452, -1.5452, -1.6593, -1.5013],
        [-1.7669, -1.6085, -1.6170, -1.4995, -1.5740]],
       grad_fn=<LogSoftmaxBackward0>)

In [30]:
(y_one_hot * -torch.log(F.softmax(z, dim=1))).sum(dim=1).mean()

tensor(1.5109, grad_fn=<MeanBackward0>)

In [31]:
(y_one_hot * -F.log_softmax(z, dim=1)).sum(dim=1).mean()

tensor(1.5109, grad_fn=<MeanBackward0>)

In [32]:
F.nll_loss(F.log_softmax(z, dim=1), y) # nll : Negative log Likelihood 약자

tensor(1.5109, grad_fn=<NllLossBackward0>)

In [33]:
F.cross_entropy(z, y) # F.log_softmax()와 F.nll_loss()를 포함

tensor(1.5109, grad_fn=<NllLossBackward0>)

In [34]:
x_train = [[1,2,1,1],[2,1,3,2],[3,1,3,4],[4,1,5,5],[1,7,5,5],[1,2,5,6],[1,6,6,6],[1,7,7,7]]
y_train = [2,2,2,1,1,1,0,0]

In [35]:
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)
print(x_train)
print(x_train.shape)
print(y_train)
print(y_train.size())

tensor([[1., 2., 1., 1.],
        [2., 1., 3., 2.],
        [3., 1., 3., 4.],
        [4., 1., 5., 5.],
        [1., 7., 5., 5.],
        [1., 2., 5., 6.],
        [1., 6., 6., 6.],
        [1., 7., 7., 7.]])
torch.Size([8, 4])
tensor([2, 2, 2, 1, 1, 1, 0, 0])
torch.Size([8])


In [36]:
y_one_hot = torch.zeros(8, 3) # 결과값이 3가지로 이루어져있음
y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
print(y_one_hot)

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])


In [37]:
w = torch.zeros((4, 3), requires_grad=True) # 4: 입력데이터 갯수, 3: 선형회귀식의 갯수
b = torch.zeros((1, 3), requires_grad=True)

optimizer = optim.SGD([w, b], lr=0.1)

In [38]:
for epoch in range(1001):
    hat = F.softmax(x_train.matmul(w)+b, dim=1)
    cost = (y_one_hot * -torch.log(hat)).sum(dim=1).mean()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('epoch : {:4d}  cost : {} '.format(epoch, cost.item()) )

epoch :    0  cost : 1.0986123085021973 
epoch :  100  cost : 0.7041996121406555 
epoch :  200  cost : 0.6229995489120483 
epoch :  300  cost : 0.5657169222831726 
epoch :  400  cost : 0.5152913331985474 
epoch :  500  cost : 0.467661589384079 
epoch :  600  cost : 0.4212779700756073 
epoch :  700  cost : 0.37540146708488464 
epoch :  800  cost : 0.3297656178474426 
epoch :  900  cost : 0.2850724160671234 
epoch : 1000  cost : 0.24815461039543152 


In [40]:
pred = F.softmax(x_train.matmul(w)+b, dim=1)
print(pred)
pred.argmax(dim=1)

tensor([[1.1909e-04, 7.5725e-03, 9.9231e-01],
        [3.9348e-03, 1.3411e-01, 8.6196e-01],
        [6.9057e-06, 2.6787e-01, 7.3212e-01],
        [3.1690e-05, 7.8606e-01, 2.1390e-01],
        [3.2070e-01, 6.5953e-01, 1.9770e-02],
        [1.6596e-01, 8.3388e-01, 1.6129e-04],
        [6.3023e-01, 3.6949e-01, 2.7694e-04],
        [8.0781e-01, 1.9218e-01, 1.4840e-05]], grad_fn=<SoftmaxBackward0>)


tensor([2, 2, 2, 1, 1, 1, 0, 0])

In [41]:
model = nn.Linear(4,3)
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(1001):
    y_hat = model(x_train)
    cost = F.cross_entropy(y_hat, y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print("epoch : {} cost : {} ".format(epoch, cost.item()))

epoch : 0 cost : 1.8495128154754639 
epoch : 100 cost : 0.6898943781852722 
epoch : 200 cost : 0.6092584729194641 
epoch : 300 cost : 0.5512180924415588 
epoch : 400 cost : 0.5001412034034729 
epoch : 500 cost : 0.45194703340530396 
epoch : 600 cost : 0.40505102276802063 
epoch : 700 cost : 0.35873350501060486 
epoch : 800 cost : 0.3129115402698517 
epoch : 900 cost : 0.26952168345451355 
epoch : 1000 cost : 0.2419215589761734 


In [42]:
model(x_train).argmax(dim=1)

tensor([2, 2, 2, 1, 1, 1, 0, 0])

In [44]:
class SoftmaxClassModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4,3)

    def forward(self, x):
        return self.linear(x)

In [45]:
model = SoftmaxClassModel()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epch in range(1001):
    y_hat = model(x_train)
    cost = F.cross_entropy(y_hat, y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print("Epoch : {} Cost: {}".format(epoch, cost.item()))

Epoch : 1000 Cost: 1.845719814300537
Epoch : 1000 Cost: 1.4605095386505127
Epoch : 1000 Cost: 1.2650318145751953
Epoch : 1000 Cost: 1.231668472290039
Epoch : 1000 Cost: 1.2149604558944702
Epoch : 1000 Cost: 1.1763356924057007
Epoch : 1000 Cost: 1.1060723066329956
Epoch : 1000 Cost: 1.1037873029708862
Epoch : 1000 Cost: 1.04230797290802
Epoch : 1000 Cost: 1.0560168027877808
Epoch : 1000 Cost: 0.9899396896362305
Epoch : 1000 Cost: 1.021715521812439
Epoch : 1000 Cost: 0.9499561786651611
Epoch : 1000 Cost: 0.994708240032196
Epoch : 1000 Cost: 0.91837078332901
Epoch : 1000 Cost: 0.971838116645813
Epoch : 1000 Cost: 0.8928501009941101
Epoch : 1000 Cost: 0.95176100730896
Epoch : 1000 Cost: 0.8717033863067627
Epoch : 1000 Cost: 0.933782696723938
Epoch : 1000 Cost: 0.8537725806236267
Epoch : 1000 Cost: 0.9175072312355042
Epoch : 1000 Cost: 0.8382664322853088
Epoch : 1000 Cost: 0.902678370475769
Epoch : 1000 Cost: 0.8246381282806396
Epoch : 1000 Cost: 0.8891116380691528
Epoch : 1000 Cost: 0.8125

In [46]:
model(x_train).argmax(dim=1)

tensor([2, 2, 2, 1, 1, 1, 0, 0])