In [9]:
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import sys
import torch
import numpy as np

from torchvision import transforms, datasets
import torchvision.transforms.functional as F

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
device = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu')

In [3]:
# weak augmentation
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomCrop(32, fill=128),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

In [5]:
# Load the data and transform the dataset
train_dataset =  datasets.CIFAR10(root='./data', train=True, download=True, transform = transform)
validation_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform = transform)

# Create train and validation batch for training
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=100)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
train_dataset[8]

(tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -1.6042,  ..., -0.9363, -0.9877, -0.4054],
          [-2.1179, -2.1179, -1.6213,  ..., -0.9877, -0.8849, -0.3883],
          [-2.1179, -2.1179, -1.6555,  ..., -0.7822, -0.2684,  0.1083]],
 
         [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          ...,
          [-2.0357, -2.0357, -0.3025,  ..., -0.4776, -0.3025,  0.6254],
          [-2.0357, -2.0357, -0.3375,  ..., -0.4426, -0.2850,  0.5728],
          [-2.0357, -2.0357, -0.3901,  ..., -0.4076, -0.0924,  0.9405]],
 
         [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
          [-1.8044, -1.8044,

In [123]:
class BasicBlock(nn.Module):
    expansion = 1 #(Q)너는 왜 1이야? (A)BottleNeck을 쓰는 이유를 생각해보면 됨!
    
    def __init__(self, in_planes, planes, stride=1):
        """
        in_plane : in_channel 개수
        planes : out_channel 개수
        """
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        
        # 논문 3.3에서 Residual Network부분 설명
        # shortcut connection을 위해서 input과 output dimension이 동일해야함.
        # dimentsion이 증가했을 때 (planes = 64에서 planes= 128로 증가할 때), zero padding을 해주거나 projection shortcut을 해줌.
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [124]:
class Bottleneck(nn.Module):
    expansion = 4 #(Q)너는 왜 4이야? (A)256 사이즈 그대로 보면 연산량이 너무 크니까 원하는만큼 채널을 줄여서 보자!
    
    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        
        # (1*1) conv 채널 축소 : padding, stride 노상관
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes*self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes*self.expansion) #(256)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [125]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes):
        super(ResNet, self).__init__()
        self.in_planes = 64
        
        # 처음에 들어오는 이미지의 채널은 3
        # conv1 (7*7, 64, stride=2) or (3*3, 64, stride=2) * N
        # stride = 2로 downsampling
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        # layer2-4는 downsampling을 위해 stride = 2
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes) 
        
    def _make_layer(self, block, planes, num_blocks, stride):
        """
        block : block의 종류
        planes : out_channel 수
        num_blocks : block의 개수
        """
        # (Q)첫 레이어만 stride받은 대로 하고 나머지 레이어의 stride는 1로 고정? 
        # (A)논문에 보면 stride = 2를 써서 downsampling해주는 부분이 있음.
        strides = [stride] + [1]*(num_blocks-1) 
        layers = []
        # (Q)왜 stride를 써서 했지? num_block을 쓰는 게 아니라..
        # (A)stride가 블록 내에서 첫번째 레이어만 stide=2로 다운샘플링해주고 나머지는 stride=1을 사용
        for stride in strides:
            print("in_planes: ", self.in_planes, "planes: ", planes)
            layers.append(block(self.in_planes, planes, stride)) 
            # expansion이란? bottleneck의 차원축소때문에 사용하는 것
            self.in_planes = planes * block.expansion #planes에 맞춰서 in_planes를 바꿔주기
        return nn.Sequential(*layers) 
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1) #각 채널을 펼쳐서 linear에 넣어주기
        out = self.linear(out)
        return out

In [126]:
def ResNet34(num_classes):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes)


def ResNet50(num_classes):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes)

In [127]:
model = ResNet50(10)

in_planes:  64 planes:  64
in_planes:  256 planes:  64
in_planes:  256 planes:  64
in_planes:  256 planes:  128
in_planes:  512 planes:  128
in_planes:  512 planes:  128
in_planes:  512 planes:  128
in_planes:  512 planes:  256
in_planes:  1024 planes:  256
in_planes:  1024 planes:  256
in_planes:  1024 planes:  256
in_planes:  1024 planes:  256
in_planes:  1024 planes:  256
in_planes:  1024 planes:  512
in_planes:  2048 planes:  512
in_planes:  2048 planes:  512


In [145]:
model.to(device)
x = next(iter(train_loader))[0].to(device)

In [129]:
output = model(x)

In [130]:
output.size()

torch.Size([100, 10])