In [8]:
import pandas as pd
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms


# 코드 다시 돌리기 위한 seed 고정
import random
import numpy as np
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [None]:
# ResNet 저층 구조: Skip connection (Bottleneck XX) - stage1, 2, 3, 4 - 각 2개씩
class MyModel(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(MyModel, self).__init__()

        # ResNet 구조의 기본 첫 Conv Layer(224 x 224)
        '''
        self.conv1 = nn.Conv2d(in_channels, 64, 7, 2, 3)
        self.bn1 = nn.BatchNorm2d(64)
        self.act = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(3,2,1)
        '''
        # 32 x 32 에 맞추어 Conv 수정 및 Pooling 제거(32 x 32 size에 pooling하면 정보 손실이 큼)
        self.conv1 = nn.Conv2d(in_channels, 64,3,1,1)
        self.bn1 = nn.BatchNorm2d(64)
        self.act = nn.ReLU(inplace=True)

        # stage1
        # 빈 ModuleList 생성
        self.stage1 = nn.ModuleList()
        # 2개의 stage
        for _ in range(2):
          # 통일된 block 구조 conv -> batchnorm -> relu -> conv -> batchnorm
          # 3 x 3 filter 고정
          block = nn.Sequential(
              nn.Conv2d(64,64,3,1,1),
              nn.BatchNorm2d(64),
              nn.ReLU(inplace=True),
              nn.Conv2d(64,64,3,1,1),
              nn.BatchNorm2d(64)
          )
          self.stage1.append(block)
        
        # stage2
        self.stage2 = nn.ModuleList()
        for i in range(2):
          # ResNet의 규칙
          # 1. 매 스테이지 첫 번째 블록에서 downsampling(stride=2)
          # 2. Channel 수 증가(64 -> 128)
          if i == 0:
            in_channels, stride = 64, 2
          else:
            in_channels, stride = 128, 1
          block = nn.Sequential(
              nn.Conv2d(in_channels, 128,3,stride,1),
              nn.BatchNorm2d(128),
              nn.ReLU(inplace=True),
              nn.Conv2d(128,128,3,1,1),
              nn.BatchNorm2d(128)
          )
          self.stage2.append(block)
        
        # shortcut 64 -> 128
        # 증가된 채널수에 맞춰줘야하기에 skip connection을 kernel=1, stride=2로 맞춰줌
        self.shortcut64_128 = nn.Sequential(
            nn.Conv2d(64,128,1,2),
            nn.BatchNorm2d(128)
        )

        # stage2와 원리 동일(Channel 수만 변경)
        # stage3
        self.stage3 = nn.ModuleList()
        for i in range(2):
          if i == 0:
            in_channels, stride = 128, 2
          else:
            in_channels, stride = 256, 1

          block = nn.Sequential(
              nn.Conv2d(in_channels, 256,3,stride,1),
              nn.BatchNorm2d(256),
              nn.ReLU(inplace=True),
              nn.Conv2d(256,256,3,1,1),
              nn.BatchNorm2d(256)
          )
          self.stage3.append(block) # 
        
        # shortcut 128 -> 256
        self.shortcut128_256 = nn.Sequential(
            nn.Conv2d(128,256,1,2),
            nn.BatchNorm2d(256)
        )

        # stage2와 원리 동일(Channel 수만 변경)
        # stage4
        self.stage4 = nn.ModuleList()
        for i in range(2):
          if i == 0:
            in_channels, stride = 256, 2
          else:
            in_channels, stride = 512, 1

          block = nn.Sequential(
              nn.Conv2d(in_channels, 512,3,stride,1),
              nn.BatchNorm2d(512),
              nn.ReLU(inplace=True),
              nn.Conv2d(512,512,3,1,1),
              nn.BatchNorm2d(512)
          )
          self.stage4.append(block)
        # shortcut 256 -> 512
        self.shortcut256_512 = nn.Sequential(
            nn.Conv2d(256,512,1,2),
            nn.BatchNorm2d(512)
        )


        # 마지막 - 분류기
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)), # 출력 feature map을 1x1크기로 맞춰줌
            nn.Flatten(), # 벡터로 쫙 펴줌
            nn.Dropout(0.2), # 일반화 성능을 약간 높이기 위함
            nn.Linear(512, num_classes) 
        )




    def forward(self, x):
        x = self.act(self.bn1(self.conv1(x))) # 첫 Conv Layer

        # block 통과 + skip connection 이후에 relu적용!!
        for block in self.stage1:
          identity = x
          x = self.act(block(x) + identity)

        # stage2 부터는 channels 수가 바뀌기에 shortcut connection
        for i, block in enumerate(self.stage2):
          identity = x
          if i == 0:
            identity = self.shortcut64_128(identity)
          x = self.act(block(x) + identity)

        for i, block in enumerate(self.stage3):
          identity = x
          if i == 0:
            identity = self.shortcut128_256(identity)
          x = self.act(block(x) + identity)

        for i, block in enumerate(self.stage4):
          identity = x
          if i == 0:
            identity = self.shortcut256_512(identity)
          x = self.act(block(x) + identity)

        # 최종 분류 수행
        x = self.classifier(x)

        return x

In [11]:
model = MyModel(3, 100).cuda()
model

MyModel(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act): ReLU(inplace=True)
  (stage1): ModuleList(
    (0-1): 2 x Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (stage2): ModuleList(
    (0): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum

In [None]:
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # Data Augmentation(주변에 4픽셀 패딩 후 32x32로 랜덤 크롭)
    transforms.RandomHorizontalFlip(), # Data Augmentation(50% 확률로 좌우 반전)
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.ToTensor()
])

train = torchvision.datasets.CIFAR100(root="./", train=True, download=True, transform=train_transform)
test = torchvision.datasets.CIFAR100(root="./", train=False, download=True, transform=test_transform)

train_loader = torch.utils.data.DataLoader(train, batch_size=64,
                                           shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test, batch_size=64,
                                          shuffle=False, num_workers=2)
# 에폭 수
epoch_num = 50

# L2-Norm, Momentum 추가
optimizer =  optim.SGD(model.parameters(), lr=0.01, weight_decay=0.0005, momentum=0.9)
# epoch 수에 맞추어 학습률 조정
lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=epoch_num
)
criterion = nn.CrossEntropyLoss()


In [None]:
for epoch in range(epoch_num):
    model.train()
    for img, label in tqdm(train_loader):
        img = img.cuda()
        label = label.cuda()

        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
    lr_scheduler.step()
    correct, all_data = 0,0

    model.eval()
    for img, label in test_loader:
        with torch.no_grad():
            img = img.cuda()
            label = label.cuda()
            output = model(img)

            correct += torch.sum(torch.argmax(output, dim=1) == label).item()
            all_data += len(label)
    print("acc : ", correct / all_data, " epoch : ", epoch)

100%|██████████| 782/782 [00:40<00:00, 19.11it/s]


acc :  0.1876


100%|██████████| 782/782 [00:41<00:00, 18.65it/s]


acc :  0.2951


100%|██████████| 782/782 [00:41<00:00, 18.77it/s]


acc :  0.3947


100%|██████████| 782/782 [00:41<00:00, 18.66it/s]


acc :  0.4247


100%|██████████| 782/782 [00:41<00:00, 18.70it/s]


acc :  0.4942


100%|██████████| 782/782 [00:41<00:00, 18.77it/s]


acc :  0.5087


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.5474


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.5703


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.5756


100%|██████████| 782/782 [00:41<00:00, 18.66it/s]


acc :  0.5774


100%|██████████| 782/782 [00:41<00:00, 18.73it/s]


acc :  0.6184


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.6199


100%|██████████| 782/782 [00:41<00:00, 18.68it/s]


acc :  0.634


100%|██████████| 782/782 [00:41<00:00, 18.73it/s]


acc :  0.6344


100%|██████████| 782/782 [00:41<00:00, 18.70it/s]


acc :  0.6431


100%|██████████| 782/782 [00:41<00:00, 18.70it/s]


acc :  0.6438


100%|██████████| 782/782 [00:41<00:00, 18.76it/s]


acc :  0.6525


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.6714


100%|██████████| 782/782 [00:41<00:00, 18.72it/s]


acc :  0.6705


100%|██████████| 782/782 [00:41<00:00, 18.77it/s]


acc :  0.6736


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.6829


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.6867


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.6868


100%|██████████| 782/782 [00:41<00:00, 18.76it/s]


acc :  0.7004


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.6925


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.7056


100%|██████████| 782/782 [00:41<00:00, 18.77it/s]


acc :  0.6966


100%|██████████| 782/782 [00:41<00:00, 18.63it/s]


acc :  0.7123


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.7193


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.7118


100%|██████████| 782/782 [00:41<00:00, 18.69it/s]


acc :  0.7219


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.7246


100%|██████████| 782/782 [00:41<00:00, 18.70it/s]


acc :  0.7273


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.7336


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.7338


100%|██████████| 782/782 [00:41<00:00, 18.74it/s]


acc :  0.7349


100%|██████████| 782/782 [00:41<00:00, 18.72it/s]


acc :  0.7393


100%|██████████| 782/782 [00:41<00:00, 18.70it/s]


acc :  0.7438


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.7425


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.7427


100%|██████████| 782/782 [00:41<00:00, 18.73it/s]


acc :  0.7466


100%|██████████| 782/782 [00:41<00:00, 18.77it/s]


acc :  0.7484


100%|██████████| 782/782 [00:41<00:00, 18.68it/s]


acc :  0.7472


100%|██████████| 782/782 [00:41<00:00, 18.73it/s]


acc :  0.7486


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.7479


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.7493


100%|██████████| 782/782 [00:41<00:00, 18.71it/s]


acc :  0.7496


100%|██████████| 782/782 [00:41<00:00, 18.78it/s]


acc :  0.7497


100%|██████████| 782/782 [00:41<00:00, 18.70it/s]


acc :  0.7471


100%|██████████| 782/782 [00:41<00:00, 18.75it/s]


acc :  0.7487


In [15]:
model.eval()
preds = []

for i, (img, label) in enumerate(tqdm(test_loader)):
    with torch.no_grad():
        img = img.cuda()
        label = label.cuda()
        pred = model(img)

        pred = torch.argmax(pred, dim=1)
        preds += pred.cpu().detach().tolist()

df = pd.DataFrame({'id': [i for i in range(len(preds))],
                   'class': preds})
df.to_csv("20212564_resnet2222_change_base_+relu.csv", index=False)

100%|██████████| 157/157 [00:03<00:00, 49.29it/s]
