# PAPER REVIEW AND IMPLEMENTATION

## VGGnet

In [16]:
import torch 
from torch import nn
import torchvision 
import torchvision.transforms as transforms
from torchinfo import summary

from tqdm import trange

import matplotlib.pyplot as plt
import numpy as np

### Data

In [39]:
transform = transforms.Compose(
    [transforms.ToTensor(),  # 원래 이미지는 HxWxC 이지만, ToTensor를 사용하면 CxHxW로 변환된다.
     transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))] # 3개의 채널에 대해 각각 mean, std를 설정해준다. 0~255 -> -1~1 0.5를 기준으로 정규분포
)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)

classes = ('plane','car','bird','cat','deer','dog','frog','horse','ship','truck')


Files already downloaded and verified
Files already downloaded and verified


### Parameter

In [10]:
cfgs = { "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
         "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
         "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
         "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"] }
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Model

In [4]:
class VGG(nn.Module):
    def __init__(self, cfg, batch_norm, num_class=1000, init_weights = True, drop_p = 0.5):
        super().__init__() # nn.module 상속
    
        self.features = self.make_layers(cfg, batch_norm)
        self.avgpool = nn.AdaptiveAvgPool2d((7,7)) # avgpooling 인데 그냥 적응형으로 intput에 상관 없이 output size를 7x7로 만들어줌
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(drop_p),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(drop_p),
            nn.Linear(4096, num_class)
        )
        
        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                    elif isinstance(m, nn.Linear):
                        nn.init.normal_(m.weight, 0, 0.01)
                        nn.init.constant_(m.bias, 0)
                        
    def forward(self, x):
        x = self.features(x)  # feature map 추출
        x = self.avgpool(x)   # avgpooling
        x = torch.flatten(x, 1)  # 1차원으로 펴줌
        x = self.classifier(x)  # fc layer
        return x
    
    def make_layers(self, cfg, batch_norm = False):
        layers = []
        in_channels = 3
        for v in cfg:
            if type(v) == int:
                if batch_norm:
                    layers += [nn.Conv2d(in_channels, v, 3, padding=1),
                               nn.BatchNorm2d(v),
                               nn.ReLU()]
                else:
                    layers += [nn.Conv2d(in_channels, v, 3, padding=1), 
                               nn.ReLU()]
                in_channels = v
            else:
                layers += [nn.MaxPool2d(2,2)]
        return nn.Sequential(*layers)

In [5]:
model = VGG(cfgs["E"], batch_norm=False)

summary(model, input_size=(2,3,224,224), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [2, 1000]                 --
├─Sequential: 1-1                        [2, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [2, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [2, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [2, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [2, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [2, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [2, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [2, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [2, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [2, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [2, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [2, 256, 56, 56]          29

### Train

In [40]:
criterion = torch.nn.CrossEntropyLoss()  #손실함수
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # optimizer
num_epochs = 10
loss_arr = []

for i in trange(num_epochs):
    for n,(image,label) in enumerate(trainloader, 0):
        x = image.to(device)
        y_ = label.to(device)
        
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output,y_)
        loss.backward()
        optimizer.step()
        
    if i % 10 == 0:
        print(loss)
        loss_arr.append(loss.cpu().detach().numpy())    


  0%|          | 0/10 [07:22<?, ?it/s]


KeyboardInterrupt: 