# Convolutional Neural Network 모델 정의

In [2]:
import torch
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [None]:
# Conv2d 생성
## 입력 shape: (Batch, Channel, Height, Width)  (100, 10, 100, 200)
layer = nn.Conv2d(
    in_channels=10,  # 입력 channel 수
    out_channels=5, # Filter(Kernel)의 개수/Feature map의 개수
    kernel_size=3,  # Filter크기 (3, 3). H/W 크기가 동일하면 정수.
    stride=1,       # 이동 보폭(default: 1)
    padding=1 #"same",      # 패딩 수
)

In [11]:
input_data = torch.ones(1, 10, 10, 10)
output = layer(input_data)
output.shape

torch.Size([1, 5, 10, 10])

In [None]:
layer.weight.shape
# [5, 10, 3, 3]  # [filter 개수:5, 입력channel수: 10, filter-height: 3, fiter-width: 3] 
# (out_channels, in_channels, kernel_size, kernel_size)

torch.Size([5, 10, 3, 3])

In [None]:
layer.bias.shape # 필터별로 1개씩.

torch.Size([5])

In [20]:
pool_layer = nn.MaxPool2d(
    kernel_size=2, # pool 영역 크기 (2, 2)
    stride=2,      # 이동 크기. kernel size와 동일
    padding=1
)

In [17]:
print(input_data.shape)
pool_output = pool_layer(input_data)
print(pool_output.shape)

torch.Size([1, 10, 10, 10])
torch.Size([1, 10, 5, 5])


In [27]:
input_data = torch.randn(1, 1, 5, 5)

print(input_data.shape)
pool_output = pool_layer(input_data)
print(pool_output.shape)

torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])


In [28]:
input_data

tensor([[[[-0.6957, -0.6342,  0.4635,  1.9956, -1.1246],
          [-2.8484, -0.1004,  0.4432, -0.3017,  1.0525],
          [ 0.6118, -1.3783,  1.1444,  1.0403, -0.2005],
          [-1.3915,  0.3039,  0.4850,  0.6450,  0.3047],
          [-1.8958, -0.3284,  0.0737, -2.0000, -0.3655]]]])

In [29]:
pool_output

tensor([[[[-0.6957,  0.4635,  1.9956],
          [ 0.6118,  1.1444,  1.0525],
          [-1.3915,  0.4850,  0.6450]]]])

# MNIST

In [30]:
import os

import torch
from torch import nn
import torchinfo

import matplotlib.pyplot as plt
import numpy as np

from module.data import load_mnist_dataset, load_fashion_mnist_dataset
from module.train import fit

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [31]:
# 하이퍼파라미터 지정
EPOCH = 1 # 10
BATCH_SIZE = 256
LEARNING_RATE = 0.001
DATA_ROOT_DIR = "datasets"

## Data 준비

In [32]:
train_loader = load_mnist_dataset(DATA_ROOT_DIR, BATCH_SIZE, True)
test_loader = load_mnist_dataset(DATA_ROOT_DIR, BATCH_SIZE, False)

## CNN 모델 정의

In [37]:
# CNN - layer block
# ConvolutionLayer -> Activation -> Pooling Layer
# ConvolutionLayer -> Activation -> ConvolutionLayer -> Activation-> Pooling Layer

# ConvolutionLayer -> BatchNormalization -> Activation -> Dropout -> Pooling Layer

## 구조: filter 개수는 늘려주고(channel-depth) feature map의 size는 줄이는 방식으로 구성.
# depth: Conv2d,  size: MaxPool2d

class CNNModel(nn.Module):

    def __init__(self, dropout_rate=0.2):
        
        super().__init__()
        

        self.b1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1, 
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding="same"                                  
            ),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            # nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(
                kernel_size=2, 
                stride=2
            )
        )

        self.b2 = nn.Sequential(
            nn.Conv2d(
                in_channels=32, 
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding="same"
            ),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.b3 = nn.Sequential(
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=3,
                stride=1,
                padding="same"
            ),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            # nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )
        # 추론기(분류기) - Fully Conntected Layer(nn.Liear)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=4*4*128, out_features=10)
        )

    def forward(self, X):
        output = self.b1(X)
        output = self.b2(output)
        output = self.b3(output)
        output = self.classifier(output)
        return output

In [38]:
torchinfo.summary(CNNModel(dropout_rate=0.5), (1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
CNNModel                                 [1, 10]                   --
├─Sequential: 1-1                        [1, 32, 14, 14]           --
│    └─Conv2d: 2-1                       [1, 32, 28, 28]           320
│    └─BatchNorm2d: 2-2                  [1, 32, 28, 28]           64
│    └─ReLU: 2-3                         [1, 32, 28, 28]           --
│    └─MaxPool2d: 2-4                    [1, 32, 14, 14]           --
├─Sequential: 1-2                        [1, 64, 7, 7]             --
│    └─Conv2d: 2-5                       [1, 64, 14, 14]           18,496
│    └─BatchNorm2d: 2-6                  [1, 64, 14, 14]           128
│    └─ReLU: 2-7                         [1, 64, 14, 14]           --
│    └─MaxPool2d: 2-8                    [1, 64, 7, 7]             --
├─Sequential: 1-3                        [1, 128, 4, 4]            --
│    └─Conv2d: 2-9                       [1, 128, 7, 7]            73,856
│    

## Train

In [39]:
# 모델
model = CNNModel().to(device)

# loss 함수
loss_fn = nn.CrossEntropyLoss() 

# 옵티마이저
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [40]:
os.makedirs("saved_models", exist_ok=True)

save_path = "saved_models/mnist_cnn_model.pt"
result = fit(
    train_loader, test_loader, model, loss_fn, optimizer, EPOCH,
    save_model_path=save_path,
    device=device, 
    mode="multi"
)

Epoch[1/1] - Train loss: 0.04362 Train Accucracy: 0.98653 || Validation Loss: 0.04200 Validation Accuracy: 0.98690
저장: 1 - 이전 : inf, 현재: 0.04200267249252647
39.9455361366272 초


In [None]:
model = torch.load(save_path)

In [None]:
# 최종 평가
from module.train import test_multi_classification
loss, acc = test_multi_classification(
    test_loader, model, loss_fn, device
)
loss, acc

In [None]:
## 정성적 평가 - 실제 image 파일로 확인
from PIL import Image
img = Image.open("test_img/num/eight.png")
type(img)

In [None]:
from torchvision import transforms
from PIL import Image

@torch.no_grad
def predict(path, model):
    img = Image.open(path)

    # color -> grayscale
    img = img.convert('L') # 'L': grayscale, "RGB": color

    # resize
    input_tensor = transforms.Resize((28, 28))(img)

    # PIL.Image -> torch.Tensor  변환, 정규화 (0 ~ 1)
    input_tensor = transforms.ToTensor()(input_tensor)

    # batch 축(dummy 축)을 추가.
    input_tensor = input_tensor.unsqueeze(dim=0)

    # device로 이동
    input_tensor = input_tensor.to(device)

    model.eval()
    model = model.to(device)
    
    result = model(input_tensor)
    sm = nn.Softmax(dim=-1)
    result_proba = sm(result)
    final_result = result_proba.max(dim=-1)
    return {"class":final_result.indices[0], 
            "확률":final_result.values[0]}

In [None]:
from glob import glob

img_path_list = glob("test_img/num/*.png")
for img_path in img_path_list:
    result = predict(img_path, model)
    print(f"{img_path}, 추론class: {result['class']}, 확률: {result['확률']}")