# ResNet-50을 활용하자


In [1]:
import random
import time
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from PIL import Image
from tqdm import tqdm

plt.rcParams["font.family"] = "Malgun Gothic"
plt.rcParams["axes.unicode_minus"] = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

random.seed(42)

cpu


In [2]:
class ImageTransform:
    def __init__(self, resize, mean, std):
        self.data_transform = {
            "train": transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
                ]),
            "val": transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
                ]),
            "test": transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
                ]),
        }
    def __call__(self, img, phase):
        return self.data_transform[phase](img)

In [3]:
class DogVsCatDataset(Dataset):
    def __init__(self, file_list, transform=None, phase="train"):
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = Image.open(img_path).convert("RGB")
        img_transform = self.transform(img, self.phase)
        if "dog" in img_path.lower():
            label = 1
        else:
            label = 0
        return img_transform, label            

In [4]:
cat_directory = Path("data/catanddog/train/Cat")
dog_directory = Path("data/catanddog/train/Dog")
cat_images_filepaths = sorted([str(p) for p in cat_directory.glob("*.jpg")])
dog_images_filepaths = sorted([str(p) for p in dog_directory.glob("*.jpg")])
images_filepath = cat_images_filepaths + dog_images_filepaths
random.shuffle(images_filepath)

In [5]:
total_images = len(images_filepath)
train_size = int(total_images * 0.7)
val_size = int(total_images * 0.2)
test_size = total_images - (train_size + val_size)

In [6]:
train_images_filepaths = images_filepath[:train_size]
val_images_filepaths = images_filepath[train_size: train_size + val_size]
test_images_filepaths = images_filepath[train_size + val_size: train_size + val_size + test_size]

In [7]:
size = 224
mean= (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

In [8]:
train_dataset = DogVsCatDataset(train_images_filepaths, 
                                transform=ImageTransform(size, mean, std), 
                                phase="train")
val_dataset = DogVsCatDataset(val_images_filepaths, 
                              transform=ImageTransform(size, mean, std), 
                              phase="val")
test_dataset = DogVsCatDataset(test_images_filepaths, 
                               transform=ImageTransform(size, mean, std), 
                               phase="test")

In [9]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [10]:
dataloader_dic = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader
}

In [None]:
# batch_size 는 일반적으로 32, 64로 한다. 

In [11]:
# 6@28x28(C) -> 6@14x14(S) -> 16@10x10 -> 16@5x5 (특성맵 => OOPool)
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.c1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0)
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2) 
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2) # 16x53
        self.f5 = nn.Linear(53*53*16, 120)
        self.f6 = nn.Linear(120, 64)
        self.output = nn.Linear(64, 2)

    def forward(self, x):
        x = torch.tanh(self.c1(x))
        x = self.s2(x)
        x = torch.tanh(self.c3(x))
        x = self.s4(x)        
        x = x.view(x.size(0), -1)
        x = torch.tanh(self.f5(x))
        x = torch.tanh(self.f6(x))
        x = self.output(x)

        return x

In [12]:
def train_model(model, dataloader_dic, criterion, optimizer, num_epoch):
    since = time.time()
    best_acc = 0.0

    for epoch in range(num_epoch):
        print(f"Epoch: {epoch + 1} / {num_epoch}")
        print("-" * 20)
        
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()            
            epoch_loss = 0.0
            epoch_corrects = 0
            
            for inputs, labels in tqdm(dataloader_dic[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()                    
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = epoch_loss / len(dataloader_dic[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dic[phase].dataset)            
            print(f"loss: {epoch_loss}, acc: {epoch_acc}")

            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                bst_model_wts = model.state_dict() # 최적의 매개변수를 저장
    time_elapsed = time.time() - since
    print(f"best acc: {best_acc}, end: {time_elapsed % 60}s")
    return model

In [13]:
model = LeNet5()
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.9)
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
num_epoch = 10
model = train_model(model, dataloader_dic, criterion, optimizer, num_epoch)

Epoch: 1 / 10
--------------------


100%|██████████| 9/9 [00:02<00:00,  3.96it/s]


loss: 0.5604471714966359, acc: 0.8066914498141264


100%|██████████| 3/3 [00:00<00:00,  7.55it/s]


loss: 0.29351162097670813, acc: 1.0
Epoch: 2 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.73it/s]


loss: 0.15418655716132054, acc: 1.0


100%|██████████| 3/3 [00:00<00:00,  9.74it/s]


loss: 0.06036133435252425, acc: 1.0
Epoch: 3 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.72it/s]


loss: 0.042593045748742536, acc: 1.0


100%|██████████| 3/3 [00:00<00:00,  9.55it/s]


loss: 0.025895379217607633, acc: 1.0
Epoch: 4 / 10
--------------------


100%|██████████| 9/9 [00:02<00:00,  4.31it/s]


loss: 0.021153735460845067, acc: 1.0


100%|██████████| 3/3 [00:00<00:00,  9.77it/s]


loss: 0.016202409763808375, acc: 1.0
Epoch: 5 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.71it/s]


loss: 0.014314236271724824, acc: 1.0


100%|██████████| 3/3 [00:00<00:00, 10.04it/s]


loss: 0.012187413919668693, acc: 1.0
Epoch: 6 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.74it/s]


loss: 0.011202693536019015, acc: 1.0


100%|██████████| 3/3 [00:00<00:00, 10.05it/s]


loss: 0.010017785001788047, acc: 1.0
Epoch: 7 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.77it/s]


loss: 0.009401413224305141, acc: 1.0


100%|██████████| 3/3 [00:00<00:00,  9.77it/s]


loss: 0.008633067686255876, acc: 1.0
Epoch: 8 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.87it/s]


loss: 0.008203203097456672, acc: 1.0


100%|██████████| 3/3 [00:00<00:00,  9.71it/s]


loss: 0.007644314070945823, acc: 1.0
Epoch: 9 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.76it/s]


loss: 0.007311759080363915, acc: 1.0


100%|██████████| 3/3 [00:00<00:00, 10.01it/s]


loss: 0.006885495980258112, acc: 1.0
Epoch: 10 / 10
--------------------


100%|██████████| 9/9 [00:01<00:00,  4.75it/s]


loss: 0.006618991045828313, acc: 1.0


100%|██████████| 3/3 [00:00<00:00,  8.33it/s]

loss: 0.006274312607605349, acc: 1.0
best acc: 1.0, end: 22.742528915405273s



