# VGG

`torchvision`에 있는 `VGG`를 참고해서 VGG Net을 직접 구현해보자!

그리고 이를 cifar10 dataset에 적용해보자!

## 1. VGG Network

In [2]:
import torch
from torch import nn

In [3]:
class VGG(nn.Module):
    
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )
        
        if init_weights:
            self._initialize_weights()
            
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m. weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [4]:
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [5]:
cfg = [32,32,'M', 64,64,128,128,128,'M',256,256,256,512,512,512,'M']

## 2. CIFAR-10 Dataset

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [7]:
batch_size = 128
validation_ratio = 0.2
random_seed = 10
initial_lr = 0.005
num_epoch = 30

In [8]:
import numpy as np
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(
    root='../datasets/CIFAR10_dataset', train=True, download=True, transform=transform)

validset = torchvision.datasets.CIFAR10(
    root='../datasets/CIFAR10_dataset', train=True, download=True, transform=transform)

testset = torchvision.datasets.CIFAR10(
    root='../datasets/CIFAR10_dataset', train=False, download=True, transform=transform)

num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(validation_ratio * num_train))

np.random.seed(random_seed)
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, sampler=train_sampler, num_workers=0
)

valid_loader = torch.utils.data.DataLoader(
    validset, batch_size=batch_size, sampler=valid_sampler, num_workers=0
)

test_loader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=0
)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
num_classes = len(classes)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


## 3. 학습

In [9]:
vgg16 = VGG(make_layers(cfg), num_classes=num_classes, init_weights=True).to(device)

In [10]:
from torch import optim

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(vgg16.parameters(), lr=0.005, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True)

In [11]:
epochs = 30

for epoch in range(epochs):
    # Training
    running_loss = 0.0
    vgg16.train() # training mode for dropout
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # running_loss += loss.item()
        # # print every 30 mini-batches
        # if i % 30 == 29:
        #     print('[Train] Epoch %3d, Mini-batches %5d, Loss: %.7f' %
        #           (epoch+1, i+1, running_loss / 30))
        #     running_loss = 0.0
        running_loss += loss / len(train_loader)

    print('[Train] Epoch %3d/%d, Loss: %.7f' % (epoch+1, epochs, running_loss))
    
    # Validation
    correct = 0
    total = 0
    with torch.no_grad():
        val_loss = 0.0
        vgg16.eval() # evaluation mode for dropout
        for data in valid_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = vgg16(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss / len(valid_loader)
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        print('[Valid] Epoch %3d/%d, Loss: %.7f, Accuracy: %.1f%%' % 
              (epoch+1, epochs, val_loss, (100*correct / total))
            )
        
    scheduler.step(val_loss)

[Train] Epoch   1/30, Loss: 2.0440755
[Valid] Epoch   1/30, Loss: 1.7301058, Accuracy: 34.8%
[Train] Epoch   2/30, Loss: 1.5856618
[Valid] Epoch   2/30, Loss: 1.5067730, Accuracy: 44.0%
[Train] Epoch   3/30, Loss: 1.3876722
[Valid] Epoch   3/30, Loss: 1.4067990, Accuracy: 51.4%
[Train] Epoch   4/30, Loss: 1.2109097
[Valid] Epoch   4/30, Loss: 1.1614313, Accuracy: 58.2%
[Train] Epoch   5/30, Loss: 1.0631638
[Valid] Epoch   5/30, Loss: 1.0294256, Accuracy: 62.8%
[Train] Epoch   6/30, Loss: 0.9365886
[Valid] Epoch   6/30, Loss: 0.9183221, Accuracy: 67.1%
[Train] Epoch   7/30, Loss: 0.8514708
[Valid] Epoch   7/30, Loss: 0.8347962, Accuracy: 70.6%
[Train] Epoch   8/30, Loss: 0.7615770
[Valid] Epoch   8/30, Loss: 0.7613340, Accuracy: 73.7%
[Train] Epoch   9/30, Loss: 0.6800954
[Valid] Epoch   9/30, Loss: 0.8201019, Accuracy: 72.0%
[Train] Epoch  10/30, Loss: 0.6111642
[Valid] Epoch  10/30, Loss: 0.6955789, Accuracy: 76.3%
[Train] Epoch  11/30, Loss: 0.5522207
[Valid] Epoch  11/30, Loss: 0.68

In [12]:
correct = 0
total = 0

with torch.no_grad():
    vgg16.eval() # evaluation mode for dropout
    for data in test_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = vgg16(images)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %.1f %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 82.1 %
