In [1]:
%matplotlib inline

# Pre-Trained AlexNet/VGG/ResNet/DenseNet 성능 비교하기

### Import Libraries

In [2]:
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn

### Load Pretrained models

In [3]:
classes    = ('plane', 'car', 'bird', 'cat', 'deer', 
              'dog', 'frog', 'horse', 'ship', 'truck')
batch_size = 128

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
# Load AlexNet
alexnet  = models.alexnet(pretrained=True)
num_ftrs = alexnet.classifier[-1].in_features
alexnet.classifier[-1] = nn.Linear(num_ftrs, len(classes))
alexnet = alexnet.to(device)
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [5]:
# Load VGG16
vgg16    = models.vgg16(pretrained=True)
num_ftrs = vgg16.classifier[-1].in_features
vgg16.classifier[-1] = nn.Linear(num_ftrs, len(classes))
vgg16 = vgg16.to(device)
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [6]:
# Load resnet18
resnet18 = models.resnet18(pretrained=True)
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Linear(num_ftrs, len(classes))
resnet18 = resnet18.to(device)
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
# Load DenseNet
densenet = models.densenet161(pretrained=True)
print(densenet)
num_ftrs = densenet.classifier.in_features
densenet.classifier = nn.Linear(num_ftrs, len(classes))
densenet = densenet.to(device)
print(densenet)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

### Load CIFAR-10 Data

In [8]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),    # Data Aumentation
        transforms.RandomHorizontalFlip(),    # Data Augmentation, 좌우 대칭
        transforms.ToTensor(),                # 기본 data transform, Data Augmentation 후에 작성
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])   # mean, std  # 기본 data transform, Data Augmentation 후에 작성
    ]),
    'val': transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

train_set =torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms['val'])
test_set  =torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms['val'])

dataloaders=dict()   # 학습 진행 하면서/infer 하면서 data를 batch에 따라 나누고, shuffle하는 것
dataloaders['train'] = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
dataloaders['val']   = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
def infer_model(model, criterion) : #{
    print('>>', model.__class__.__name__)
    model.eval()

    total   = 0
    correct = 0
    with torch.no_grad() : #{
        for phase, dataloader in dataloaders.items() : #{
            for images, labels in dataloader : #{
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                _, pred = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                total += labels.size(0)
                correct += (pred == labels).sum().item()
            #}
            print('Accuracy of the network on the %s images: %d %%' % (phase, 100 * correct / total))
        #}    
    #}

#}

In [10]:
# Loss Function 설정
criterion = nn.CrossEntropyLoss()

In [None]:
models = [alexnet, vgg16, resnet18, densenet]

for model in models :
    infer_model(model, criterion)

>> AlexNet


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Accuracy of the network on the train images: 8 %
Accuracy of the network on the val images: 8 %
>> VGG
Accuracy of the network on the train images: 10 %
Accuracy of the network on the val images: 10 %
>> ResNet
Accuracy of the network on the train images: 8 %
Accuracy of the network on the val images: 8 %
>> DenseNet


### 정리
- Pytorch에서 ImageNet데이터를 제공하지 않아 대신에 CIFAR-10을 이용해서 Inference를 진행했습니다.  
- Pretrained model이 CIFAR-10이 아닌 ImageNet데이터로 학습되었더보니, Inference 정확도가 그렇게 높지 않음을 알 수 있습니다. 따라서 정확도를 높이기 위해서는 Transfer Learning이 필요할 것 같습니다.  
- 다음 과제에서 Transfer Learning에 대한 내용을 공부하므로 이 과제에서는 따로 하지는 않았고, Pretrain된 모델을 불러오고 Infer할 수 있음에 의의를 두었습니다.