In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

In [4]:
# output은 LongTensor로 해야함에 주의
x_train = torch.FloatTensor([[1,2,1,1],
                             [2,1,3,2],
                             [3,1,3,4],
                             [4,1,5,5],
                             [1,7,5,5],
                             [1,2,5,6],
                             [1,6,6,6],
                             [1,7,7,7]])
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

In [7]:
class MultiLayerPerceptron(nn.Module):
  def __init__(self):
    super(MultiLayerPerceptron, self).__init__()
    self.linear1 = nn.Linear(4, 3) # input 차원(첫 인자) 4는 반드시 맞춰야 함
    self.activation = nn.Sigmoid()

    self.linear2 = nn.Linear(3, 3) # output 차원(두번째 인자) 3은 반드시 맞춰야 함

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)

    return z2

In [8]:
model = MultiLayerPerceptron().train()

In [14]:
optimizer = optim.SGD(model.parameters(), lr=0.1) 

In [15]:
epochs = 8000
model.train()
for epoch in range(epochs):
  logits = model(x_train) # forward propagation. logit : 마지막 activation function의 input

  #log_probs = nn.LogSoft(dim=1)(logits)
  #cost = nn.NLLLoss()(log_probs, y_train)
  cost = nn.CrossEntropyLoss()(logits, y_train) # get cost : 위 2줄을 실행한 것과 같음. CrossEntropyLoss()에 softmax 함수 포함

  optimizer.zero_grad()
  cost.backward() # backward propagation
  optimizer.step() # update parameters

In [16]:
model.eval()
with torch.no_grad():
  logits = model(x_train)
probs = nn.Softmax(dim=1)(logits)

print('logit\n : {}'.format(logits))
print('predict with softmax\n : {}'.format(probs))
print('predict with argmax\n : {}'.format(torch.argmax(probs, dim=1))) # argmax() : 확률이 높은 곳의 index 추출

logit
 : tensor([[ -7.0982,  -1.7816,   7.4594],
        [-12.1272,   1.7027,   9.1273],
        [-11.9471,   1.6681,   8.9964],
        [ -4.0371,   5.3971,  -1.6738],
        [ -2.4762,   4.3466,  -2.6766],
        [ -1.8245,   5.1284,  -3.4385],
        [  6.9460,   0.4871,  -7.6474],
        [  7.0659,   0.4359,  -7.7097]])
predict with softmax
 : tensor([[4.7608e-07, 9.6974e-05, 9.9990e-01],
        [5.8755e-10, 5.9603e-04, 9.9940e-01],
        [8.0179e-10, 6.5625e-04, 9.9934e-01],
        [7.9869e-05, 9.9907e-01, 8.4862e-04],
        [1.0865e-03, 9.9802e-01, 8.8923e-04],
        [9.5480e-04, 9.9886e-01, 1.9008e-04],
        [9.9844e-01, 1.5641e-03, 4.5864e-07],
        [9.9868e-01, 1.3184e-03, 3.8235e-07]])
predict with argmax
 : tensor([2, 2, 2, 1, 1, 1, 0, 0])


In [20]:
# GPU 사용 여부 확인 코드
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [19]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)
test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train = False,
                                           transform = transforms.ToTensor(),
                                           download = True)

In [21]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [34]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(784, 784*3) # input size : 28*28 = 784
    self.linear2 = nn.Linear(784*3, 784*2)
    self.linear3 = nn.Linear(784*2, 10) # output size : 10 (0~9 구분)

    self.activation = nn.Sigmoid()
  
  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    a2 = self.activation(z2)
    
    z3 = self.linear3(a2)

    return z3

In [35]:
model = Model().to(device).train()

In [36]:
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [37]:
criterion = nn.CrossEntropyLoss()

In [38]:
epochs = 15

model.train()
for epoch in range(epochs):
  avg_cost = 0
  total_batch_num = len(train_dataloader)

  for b_x, b_y in train_dataloader:
    b_x = b_x.view(-1, 28*28).to(device)
    logits = model(b_x) # forward prop
    loss = criterion(logits, b_y.to(device)) # 현재 batch size 만큼의 cost function

    optimizer.zero_grad()
    loss.backward() # backward prop
    optimizer.step() # update parameters

    avg_cost += loss / total_batch_num # 모든 데이터셋에 대한 cost 값

  print('Epoch : {} / {}, cost : {}'.format(epoch+1, epochs, avg_cost))

Epoch : 1 / 15, cost : 2.3282883167266846
Epoch : 2 / 15, cost : 1.4339090585708618
Epoch : 3 / 15, cost : 0.7408351302146912
Epoch : 4 / 15, cost : 0.5390636324882507
Epoch : 5 / 15, cost : 0.4572492241859436
Epoch : 6 / 15, cost : 0.41629156470298767
Epoch : 7 / 15, cost : 0.39195871353149414
Epoch : 8 / 15, cost : 0.37499579787254333
Epoch : 9 / 15, cost : 0.3617182970046997
Epoch : 10 / 15, cost : 0.35055118799209595
Epoch : 11 / 15, cost : 0.3407725691795349
Epoch : 12 / 15, cost : 0.3320487439632416
Epoch : 13 / 15, cost : 0.3242090344429016
Epoch : 14 / 15, cost : 0.3171291649341583
Epoch : 15 / 15, cost : 0.31068915128707886


In [41]:
# Accuracy 확인 (얼마나 모델이 데이터를 잘 분류하는지 평가)
correct = 0
total = 0

model.eval()
for b_x, b_y in test_dataloader:
  b_x = b_x.view(-1, 784).to(device)

  with torch.no_grad():
    logits = model(b_x)
  probs = nn.Softmax(dim=1)(logits)

  predicts = torch.argmax(logits, dim=1)

  total += len(b_y) # 현재 step의 데이터 개수
  correct += (predicts == b_y.to(device)).sum().item() # 예측한 라벨과 실제 라벨 비교해서 True인 횟수

print(f'Accuracy of the network on test images: {100 * correct // total} %')

Accuracy of the network on test images: 90 %
