In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# Random Seed 고정
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [3]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [4]:
# Torchvision 이용해서 data 불러오기
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                             train=True,
                                             transform=transforms.ToTensor(),
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                            train=False,
                                            transform=transforms.ToTensor(),
                                            download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting CIFAR10/cifar-10-python.tar.gz to CIFAR10/
Files already downloaded and verified


In [5]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [6]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(32*32*3, 256) # input size : 32*32*3 (RGB)
    self.linear2 = nn.Linear(256, 128)
    self.linear3 = nn.Linear(128, 10)      # output size : 10

    self.activation = nn.Sigmoid()
  
  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    a2 = self.activation(z2)
    
    z3 = self.linear3(a2)

    return z3

In [7]:
model = Model().to(device).train()

In [11]:
optimizer = optim.SGD(model.parameters(), lr=1) 

In [12]:
criterion = nn.CrossEntropyLoss()

In [None]:
epochs = 70

train_avg_costs = []
test_avg_costs = []

test_total_batch = len(test_dataloader)
total_batch_num = len(train_dataloader)

for epoch in range(epochs):
  avg_cost = 0
  model.train()

  for b_x, b_y in train_dataloader:
    b_x = b_x.view(-1, 32*32*3).to(device)
    logits = model(b_x) # forward prop
    loss = criterion(logits, b_y.to(device)) # get cost

    optimizer.zero_grad()
    loss.backward() # backward prop
    optimizer.step() # update parameters

    avg_cost += loss / total_batch_num # 모든 데이터셋에 대한 cost 값

  train_avg_costs.append(avg_cost.detach()) # ???
  print('Epoch : {} / {}, cost : {}'.format(epoch+1, epochs, avg_cost))

  test_avg_cost = 0
  model.eval()
  for b_x, b_y in test_dataloader:
    b_x = b_x.view(-1, 32*32*3).to(device)
    with torch.no_grad():
      logits = model(b_x)
      test_loss = criterion(logits, b_y.to(device))
    test_avg_cost += test_loss / test_total_batch
  
  test_avg_costs.append(test_avg_cost.detach())

Epoch : 1 / 70, cost : 2.1660451889038086
Epoch : 2 / 70, cost : 1.9527112245559692
Epoch : 3 / 70, cost : 1.8455394506454468
Epoch : 4 / 70, cost : 1.768599033355713
Epoch : 5 / 70, cost : 1.7144073247909546
Epoch : 6 / 70, cost : 1.6716103553771973
Epoch : 7 / 70, cost : 1.6332426071166992
Epoch : 8 / 70, cost : 1.5961003303527832
Epoch : 9 / 70, cost : 1.5622532367706299
Epoch : 10 / 70, cost : 1.5304527282714844
Epoch : 11 / 70, cost : 1.5031541585922241
Epoch : 12 / 70, cost : 1.480806827545166
Epoch : 13 / 70, cost : 1.4570984840393066
Epoch : 14 / 70, cost : 1.4342296123504639
Epoch : 15 / 70, cost : 1.4150397777557373
Epoch : 16 / 70, cost : 1.3987442255020142
Epoch : 17 / 70, cost : 1.3775017261505127
Epoch : 18 / 70, cost : 1.3569788932800293
Epoch : 19 / 70, cost : 1.3391313552856445
Epoch : 20 / 70, cost : 1.3223328590393066
Epoch : 21 / 70, cost : 1.3057750463485718
Epoch : 22 / 70, cost : 1.2892451286315918
Epoch : 23 / 70, cost : 1.2777546644210815
Epoch : 24 / 70, cost 

In [None]:
# 학습이 overfitting인지 확인
import matplotlib.pyplot as plt
import numpy as np

epoch = range(epoch)
plt.plot(epoch, train_avg_costs, 'r-')
plt.plot(epoch, test_avg_costs, 'b-')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(['train', 'test'])
plt.show()

In [13]:
# L2 Regularization

epochs = 70
lmbd = 0.003

train_avg_costs = []
test_avg_costs = []

test_total_batch = len(test_dataloader)
total_batch_num = len(train_dataloader)

for epoch in range(epochs):
  avg_cost = 0
  model.train()

  for b_x, b_y in train_dataloader:
    b_x = b_x.view(-1, 32*32*3).to(device)
    logits = model(b_x) # forward prop
    loss = criterion(logits, b_y.to(device)) # get cost

    # 바뀌는 부분
    reg = model.linear1.weight.pow(2.0).sum()
    reg += model.linear2.weight.pow(2.0).sum()
    reg += model.linear3.weight.pow(2.0).sum()

    loss += lmbd*reg/len(b_x)/len(b_x)/2.

    optimizer.zero_grad()
    loss.backward() # backward prop
    optimizer.step() # update parameters

    avg_cost += loss / total_batch_num # 모든 데이터셋에 대한 cost 값

  train_avg_costs.append(avg_cost.detach()) # ???
  print('Epoch : {} / {}, cost : {}'.format(epoch+1, epochs, avg_cost))

  test_avg_cost = 0
  model.eval()
  for b_x, b_y in test_dataloader:
    b_x = b_x.view(-1, 32*32*3).to(device)
    with torch.no_grad():
      logits = model(b_x)
      test_loss = criterion(logits, b_y.to(device))
    test_avg_cost += test_loss / test_total_batch
  
  test_avg_costs.append(test_avg_cost.detach())

Epoch : 1 / 70, cost : 2.1660666465759277
Epoch : 2 / 70, cost : 1.9527500867843628


KeyboardInterrupt: ignored

In [None]:
# Dropout

class Model(nn.Module):
  def __init__(self, drop_prob):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(32*32*3, 256) # input size : 32*32*3 (RGB)
    self.linear2 = nn.Linear(256, 128)
    self.linear3 = nn.Linear(128, 10)      # output size : 10

    self.dropout = nn.Dropout(drop_prob)   # 추가
    self.activation = nn.Sigmoid()
  
  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)
    a1 = self.dropout(a1)     # 추가

    z2 = self.linear2(a1)
    a2 = self.activation(z2)
    a2 = self.dropout(a2)     # 추가
    
    z3 = self.linear3(a2)

    return z3

In [10]:
model = Model(0.1).to(device).train()

TypeError: ignored

In [None]:
# Normalization
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))]
)

train_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                             train=True,
                                             transform=transforms.ToTensor(),
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                            train=False,
                                            transform=transforms.ToTensor(),
                                            download=True)

*CIFAR-10*에 L2 Regularization, Dropout, Normalization 모두 적용 후 **test accuracy** 확인

예상과 다른 결과가 나올 시 **hyperparameter**(*epoch, learning rate, ...*) 조정