In [65]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import os
import torch.nn.functional as F

## Resnet 18

In [66]:
class BasicBlock(nn.Module):
  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()

    self.conv1 = nn.Conv2d(in_planes, planes, 3, stride, 1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    
    self.conv2 = nn.Conv2d(planes, planes, 3, 1, 1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()

    if stride != 1:
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, planes, 1, stride, bias=False),
          nn.BatchNorm2d(planes),
      )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNet, self).__init__()
    
    self.in_planes = 64
    self.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], 1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], 2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], 2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], 2)
    self.adapPool = nn.AdaptiveAvgPool2d(1)
    self.linear = nn.Linear(512, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1] * (num_blocks - 1)  # 첫 블럭의 stride만 2 혹은 1 나머지 반복은 1
    layers = []
    for stride in strides:
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = self.adapPool(out)
    out = out.view(x.shape[0], -1)
    return self.linear(out)
    

def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

In [67]:
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

train_transform = transforms.Compose(
    [
     transforms.RandomCrop(32, padding=4),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
    ]
)

test_transform = transforms.Compose(
    [
     transforms.ToTensor(),
    ]
)

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=True, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


## Define mixup, label smoothing

In [68]:
import numpy as np
import pdb

mixup_alpha = 1.0

def mixup_data(x, y):
  lam = np.random.rand()  # 0~1
  batch_size = x.shape[0]
  index = torch.randperm(batch_size).to(device)  # randperm: batch_size 개수만큼 unique random index를 가진 1차원 tensor
  mixed_x = lam * x + (1-lam) * x[index]
  y_a, y_b = y, y[index]
  return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
  return lam * criterion(pred, y_a) + (1-lam) * criterion(pred, y_b)

class LabelSmoothingCrossEntropy(nn.Module):
  def __init__(self):
    super(LabelSmoothingCrossEntropy, self).__init__()

  def forward(self, y, targets, smoothing=0.1):
    confidence = 1. - smoothing
    log_probs = F.log_softmax(y, dim=-1)  # 0에 가까워야 좋음 (loss down)
    true_probs = torch.zeros_like(log_probs)
    true_probs.fill_(smoothing / (y.shape[1] - 1))  # 일단 10개의 라벨 전부 0.1111 로 채움
    true_probs.scatter_(1, targets.data.unsqueeze(1), confidence)  # 각 배치별로 정답의 인덱스값을 0.1111 에서 0.9로 채워줌  # torch.scatter_(dim, index, src)
    return torch.mean(torch.sum(true_probs * -log_probs, dim=-1))

In [69]:
device = 'cuda'

net = ResNet18()
net = net.to(device)

learning_rate = 0.1
file_name = 'resnet18_cifar10.pth'

criterion = LabelSmoothingCrossEntropy()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0002)


def train(epoch):
    print('\n[ Train epoch: %d ]' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        inputs, targets_a, targets_b, lam = mixup_data(inputs, targets)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        loss.backward()

        optimizer.step()
        train_loss += loss.item()
        _, predicted = outputs.max(1)

        total += targets.size(0)
        current_correct = (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item())
        correct += current_correct

        if batch_idx % 100 == 0:
            print('\nCurrent batch:', str(batch_idx))
            print('Current batch average train accuracy:', current_correct / targets.size(0))
            print('Current batch average train loss:', loss.item() / targets.size(0))

    print('\nTotal average train accuarcy:', correct / total)
    print('Total average train loss:', train_loss / total)


def test(epoch):
    print('\n[ Test epoch: %d ]' % epoch)
    net.eval()
    loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(test_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        total += targets.size(0)

        outputs = net(inputs)
        loss += criterion(outputs, targets).item()

        _, predicted = outputs.max(1)
        correct += predicted.eq(targets).sum().item()

    print('\nTotal average test accuarcy:', correct / total)
    print('Total average test loss:', loss / total)

    state = {
        'net': net.state_dict()
    }
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    torch.save(state, './checkpoint/' + file_name)
    print('Model Saved!')

In [None]:
import time


def adjust_learning_rate(optimizer, epoch):
    lr = learning_rate
    if epoch >= 50:
        lr /= 10
    if epoch >= 100:
        lr /= 10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

start_time = time.time()

for epoch in range(0, 150):
    adjust_learning_rate(optimizer, epoch)
    train(epoch)
    test(epoch)
    print('\nTime elapsed:', time.time() - start_time)


[ Train epoch: 0 ]

Current batch: 0
Current batch average train accuracy: 0.08477877211326222
Current batch average train loss: 0.01844143308699131

Current batch: 100
Current batch average train accuracy: 0.17244588517472237
Current batch average train loss: 0.01778549887239933

Current batch: 200
Current batch average train accuracy: 0.1378255725844678
Current batch average train loss: 0.01752232387661934

Current batch: 300
Current batch average train accuracy: 0.230640408061667
Current batch average train loss: 0.016410542652010918

Total average train accuarcy: 0.1798408060593202
Total average train loss: 0.018602374048233034

[ Test epoch: 0 ]

Total average test accuarcy: 0.3042
Total average test loss: 0.015726638066768647
Model Saved!

Time elapsed: 67.60855865478516

[ Train epoch: 1 ]

Current batch: 0
Current batch average train accuracy: 0.22014992677417644
Current batch average train loss: 0.01708904094994068

Current batch: 100
Current batch average train accuracy: 0.2

## 95% accuracy in CIFAR10 dataset with label smoothing and mixup augmentation