In [8]:
# 내가 만든 모델

# Load Module
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
import os
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18, ResNet18_Weights
from torchinfo import summary
from torchmetrics.functional.classification import multiclass_accuracy

In [9]:
# 1. Load Data
file_dir = '../imgs'
not_pizza_dir = '../imgs/not_pizza/'
pizza_dir = '../imgs/pizza'

In [10]:
# 오류 없는 이미지 파일명 리스트 생성
not_pizza_list, pizza_list = [], []

for img in os.listdir(not_pizza_dir):
    try:
        img = os.path.join(not_pizza_dir, img)
        img = plt.imread(img)
        not_pizza_list.append(img)
    except:
        continue
for img in os.listdir(pizza_dir):
    try:
        img = os.path.join(pizza_dir, img)
        img = plt.imread(img)
        pizza_list.append(img)
    except:
        continue

In [34]:
# Shape 통일
transform = transforms.Compose([
    transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# 전체 이미지 변환 : ImageFolder
total_dataset = ImageFolder(root=file_dir, transform=transform)

label_list = []
img_count = 0
for img, label in total_dataset:
    img_count += 1
    label_list.append(label)

# 2. Split Data
from torch.utils.data import random_split
ratios = [0.8, 0.1, 0.1]

train_dataset, valid_dataset, test_dataset = random_split(total_dataset, ratios, generator=torch.Generator().manual_seed(11))

# DataLoader : Batch Size = 32
train_loader = DataLoader(train_dataset, batch_size=32, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=32, drop_last=True)

In [68]:
class Modellasagna(nn.Module):
    def __init__(self):
        super(Modellasagna, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)  # (3, 256, 256) -> (32, 256, 256), 가로세로는 일정
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)  # (32, 256, 256) -> (64, 256, 256)
        self.pool = nn.MaxPool2d(kernel_size=2)  # (64, 256, 256) -> (64, 128, 128) -> (64, 64, 64)
        self.fc1 = nn.Linear(32 * 64 * 64, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)    # 0, 1이면 2개의 클래스, 1인가 2인가? 둘 다 해도 되지만 1로 하고 sigmoid 사용하는 게 나을 듯
        
        # 가중치 초기화
        nn.init.xavier_uniform_(self.conv1.weight)
        nn.init.xavier_uniform_(self.conv2.weight)
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.fc3.weight)
        
    def forward(self, x):
        y = F.relu(self.conv1(x))   # (3, 256, 256) -> (32, 256, 256)
        y = self.pool(y)            # (32, 256, 256) -> (32, 128, 128)
        y = F.relu(self.conv2(y))   # (32, 128, 128) -> (64, 128, 128)
        y = self.pool(y)            # (64, 128, 128) -> (64, 64, 64)
        
        y = y.view(-1, 32 * 64 * 64)  # 1차원으로 펼치기
        y = F.relu(self.fc1(y))    # (64 * 150 * 150) -> (64)
        y = self.fc2(y)            # (64) -> (1)
        y = self.fc3(y)            # (1) -> (1)
        y = torch.sigmoid(y)       # 0, 1로 나타내기 위해 sigmoid 사용
        return y

In [84]:
# 학습 준비
model = Modellasagna()
print(summary(model, input_size=(32, 3, 256, 256), device='cpu'))

optimizer = optim.Adam(model.parameters(), lr=1e-8)
criterion = nn.BCEWithLogitsLoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
Epochs = 20


Layer (type:depth-idx)                   Output Shape              Param #
Modellasagna                             [32, 1]                   --
├─Conv2d: 1-1                            [32, 16, 256, 256]        448
├─MaxPool2d: 1-2                         [32, 16, 128, 128]        --
├─Conv2d: 1-3                            [32, 32, 128, 128]        4,640
├─MaxPool2d: 1-4                         [32, 32, 64, 64]          --
├─Linear: 1-5                            [32, 64]                  8,388,672
├─Linear: 1-6                            [32, 32]                  2,080
├─Linear: 1-7                            [32, 1]                   33
Total params: 8,395,873
Trainable params: 8,395,873
Non-trainable params: 0
Total mult-adds (G): 3.64
Input size (MB): 25.17
Forward/backward pass size (MB): 402.68
Params size (MB): 33.58
Estimated Total Size (MB): 461.43


In [85]:
# make_dot
from torchviz import make_dot

# make_dot(model(torch.randn(32, 3, 256, 256)), params=dict(model.named_parameters()), show_attrs=True, show_saved=True)

In [86]:
# 학습
# 학습
loss_list, acc_list = [], []
for epoch in range(Epochs):
    model.train()
    for i, (imgs, labels) in enumerate(train_loader):
        # print('1',imgs)
        preds = model(imgs)
        label = labels.unsqueeze(1).float()
        loss = criterion(preds, label)
        # print('2',preds)
        loss_list.append(loss.item())

        optimizer.zero_grad()   # gradient 초기화
        loss.backward()     # backward
        optimizer.step()    # weight update

        if i % 10 == 0:
            print(f'Epoch [{epoch+1}/{Epochs}], Step [{i+1}/{len(train_loader)}], Loss : {loss.item()}')
        

    scheduler.step(loss.item())
    if scheduler.state_dict()['_last_lr'][0] < 1e-8:
        break

    model.eval()
    with torch.no_grad():
        for i, (imgs, labels) in enumerate(valid_loader):
            
            preds = model(imgs)
            label = labels.unsqueeze(1).float()
            print(preds)
            
            loss = criterion(preds, label)
            pred = preds > 0.5
            acc = multiclass_accuracy(pred, label, num_classes=3)
            # print(pred)
            acc_list.append(acc.item())
            
            print(f'Epoch [{epoch+1}/{Epochs}], Step [{i+1}/{len(valid_loader)}], Loss : {loss.item()}, Accuracy : {acc.item()}')

Epoch [1/20], Step [1/47], Loss : 0.6943125128746033
Epoch [1/20], Step [11/47], Loss : 0.7368735671043396
Epoch [1/20], Step [21/47], Loss : 0.70589280128479
Epoch [1/20], Step [31/47], Loss : 0.7544240355491638
Epoch [1/20], Step [41/47], Loss : 0.6319762468338013
tensor([[0.4759],
        [0.4900],
        [0.5230],
        [0.5195],
        [0.5122],
        [0.4990],
        [0.4739],
        [0.4730],
        [0.4948],
        [0.4563],
        [0.4616],
        [0.4821],
        [0.4842],
        [0.5352],
        [0.5143],
        [0.5242],
        [0.4532],
        [0.4133],
        [0.5418],
        [0.4819],
        [0.5163],
        [0.4746],
        [0.5352],
        [0.5014],
        [0.4960],
        [0.4520],
        [0.5185],
        [0.5024],
        [0.5350],
        [0.5290],
        [0.5321],
        [0.5087]])
tensor([[False],
        [False],
        [ True],
        [ True],
        [ True],
        [False],
        [False],
        [False],
        [False],
   

예측값이 너무 작은 문제  
시도1 : 학습률 줄임 0.001 -> 0.0001 : 반응 없음  
시도2 : 학습률 줄임 0.000001 -> 0.1~0.3 대로 상승  
시도3 : 0.0000001  
시도3-2 : 0.00000001 (1e-8) : 
시도4 : 모델 수정 : 가중치 초기화  
시도5 : 모델 수정 : 시그모이드 함수 위치 변경
