# Softmax function

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
x_train = torch.FloatTensor([[1,2,1,1],
                             [2,3,4,2],
                             [3,1,3,4],
                             [4,1,5,5],
                             [1,7,5,5],
                             [1,2,5,6],
                             [1,6,6,6],
                             [1,7,7,7]])
# IntTensor를 사용하지않고 longTensor를 사용하는 이유?
# 사용법이 그래....
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])
y_train2 = torch.FloatTensor([[0,0,1],
                              [0,0,1],
                              [0,0,1],
                              [0,1,0],
                              [0,1,0],
                              [0,1,0],
                              [1,0,0],
                              [1,0,0]])

In [None]:
class MultiLayerPerceptron(nn.Module):
  def __init__(self):
    super(MultiLayerPerceptron, self).__init__()
    self.linear1 = nn.Linear(4, 3)
    self.activation = nn.Sigmoid()

    self.linear2 = nn.Linear(3, 3)

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    a2 = z2

    return a2

In [None]:
model = MultiLayerPerceptron().train()
optimizer = optim.SGD(model.parameters(), lr=1)

'''
Q)
Softmax 실습에서 optimizer의 learning rate가 1인 이유는 무엇인가요?
Gradient descent시 step의 크기가 커서
minimum에 대해 over step이 생길 가능성이 있지 않나요?

A)
그럴수도 있고 아닐수도 있습니다 실험을 해봐야 알 수 있는 파라미터가 learning rate입니다.
일단 1로 설정하는 경우가 가끔 있긴합니다 만약 성능이 별로라고 생각하시면 조정하시면 됩니다.

'''

'\nQ)\nSoftmax 실습에서 optimizer의 learning rate가 1인 이유는 무엇인가요?\nGradient descent시 step의 크기가 커서\nminimum에 대해 over step이 생길 가능성이 있지 않나요?\n\nA)\n그럴수도 있고 아닐수도 있습니다 실험을 해봐야 알 수 있는 파라미터가 learning rate입니다.\n일단 1로 설정하는 경우가 가끔 있긴합니다 만약 성능이 별로라고 생각하시면 조정하시면 됩니다.\n\n'

In [None]:
epochs = 8000
model.train()

for epoch in range(epochs):
  logits = model(x_train)

  # type(nn.LogSoftmax(dim=1)) == <class 'torch.nn.modules.activation.LogSoftmax'>
  # log_probs = nn.LogSoftmax(dim=1)(logits)
  # cost = nn.NLLLoss()(log_probs, y_train) # get cost

  # https://nuguziii.github.io/dev/dev-002/
  # probs =  nn.Softmax(dim=1)(logits)
  # CrossEntropyLoss는 이미 내부적으로 Softmax가 취해져있기 때문에 2번 할필요 없다.
  cost = nn.CrossEntropyLoss()(logits, y_train) # get cost
  optimizer.zero_grad()
  cost.backward() # backward propagation
  optimizer.step() # update parameters

  if epoch != 0 and epoch % 100 == 0:
    model.eval()
    with torch.no_grad():
      logits = model.forward(x_train)
      probs = nn.Softmax(dim=1)(logits)
      print('logit\n : {}'.format(logits))
      print('predict with softmax\n : {}'.format(probs))
      print('predict with argmax\n : {}'.format(torch.argmax(probs, dim=1)))


# MNIST Data Classifier

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
import torchvision
import torchvision.transforms as transforms

In [4]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [5]:
train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=False,
                                           transform=transforms.ToTensor(),
                                           download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw



In [6]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [7]:
img_size = 28*28

In [8]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(img_size, img_size * 3)
    self.linear2 = nn.Linear(img_size * 3, img_size * 2)
    self.linear3 = nn.Linear(img_size * 2, 10)

    self.activation = nn.Sigmoid()

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    a2 = self.activation(z2)

    z3 = self.linear3(a2)
    a3 = z3

    # we are going to use softmax activation fuction on output.

    return a3

In [24]:
model = Model().to(device).train() # set to train mode

In [27]:
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [28]:
criterion = nn.CrossEntropyLoss()

In [31]:
epochs = 50
model.train() # set to train mode

for epoch in range(epochs):
  entire_cost = 0
  total_batch_num = len(train_dataloader)

  for b_x, b_y in train_dataloader:
    # from
    # torch.Size([128, 1, 28, 28])
    # to
    # torch.Size([128, 784])
    b_x = b_x.view(-1, 28*28).to(device)
    logits = model(b_x)
    cost = criterion(logits, b_y.to(device))

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    entire_cost += cost / total_batch_num
  
  print('Epoch : {} / {}, cost : {}'.format(epoch + 1, epochs, entire_cost))


Epoch : 1 / 50, cost : 0.18507246673107147
Epoch : 2 / 50, cost : 0.18330374360084534
Epoch : 3 / 50, cost : 0.1802685409784317
Epoch : 4 / 50, cost : 0.17819979786872864
Epoch : 5 / 50, cost : 0.17646300792694092
Epoch : 6 / 50, cost : 0.17364731431007385
Epoch : 7 / 50, cost : 0.17149098217487335
Epoch : 8 / 50, cost : 0.1692531704902649
Epoch : 9 / 50, cost : 0.16739380359649658
Epoch : 10 / 50, cost : 0.1653398871421814
Epoch : 11 / 50, cost : 0.16350148618221283
Epoch : 12 / 50, cost : 0.16114935278892517
Epoch : 13 / 50, cost : 0.15925654768943787
Epoch : 14 / 50, cost : 0.15702342987060547
Epoch : 15 / 50, cost : 0.15586328506469727
Epoch : 16 / 50, cost : 0.15408802032470703
Epoch : 17 / 50, cost : 0.15240541100502014
Epoch : 18 / 50, cost : 0.15014755725860596
Epoch : 19 / 50, cost : 0.14913758635520935
Epoch : 20 / 50, cost : 0.1467892974615097
Epoch : 21 / 50, cost : 0.14484073221683502
Epoch : 22 / 50, cost : 0.14325571060180664
Epoch : 23 / 50, cost : 0.1415022611618042
Ep

In [30]:
correct = 0
total = 0

model.eval() # set to inference(eval) mode

for b_x, b_y in test_dataloader:
  b_x = b_x.view(-1, img_size).to(device)

  with torch.no_grad():
    logits = model(b_x)

  probs = nn.Softmax(dim=1)(logits)

  predicts = torch.argmax(probs, dim=1)

  total += len(b_y)
  # print(type(predicts == b_y.to(device)))
  # <class 'torch.Tensor'>
  # print(type((predicts == b_y.to(device)).sum()))
  # <class 'torch.Tensor'>
  # print(type((predicts == b_y.to(device)).sum().item()))
  # <class 'int'>
  correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy: {100 * correct / total}%')





Accuracy: 94.12%
