In [89]:
%%writefile import_lib.py
'''
File tải thư viện cho Lightning và các Module cho bài phân loại multiclass
'''
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchvision.tv_tensors import Image as TVImage
from torchvision.transforms import v2 as TV2
from torchmetrics import F1Score,ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

import pytorch_lightning as PL
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.tuner import Tuner

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path
from PIL import Image as PILImage


Overwriting import_lib.py


In [None]:
%%writefile dataloader/datamodule.py
'''
Class tải dữ liệu:

Tham số: 
batch_size (int)
data_dir: đường dẫn đến data (Path)

Trả về:
prepare_data(): tải dữ liệu được ghi trong data_dir
setup(): chuẩn bị dữ liệu dựa trên hàm gọi 
data_loader(): trả về data_loader với data là train/test/val 
'''
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset
class DataModule(PL.LightningDataModule) :
    def __init__(self, batch_size=32, data_dir = Path("pizza_steak_sushi/")):
        super().__init__()
        self.batch_size = batch_size
        self.data_dir = data_dir
        self.train_transform = TV2.Compose([
            TV2.Resize((64,64)),
            TV2.ToTensor()
            ])
        self.test_transform = TV2.Compose([
            TV2.Resize((64,64)),
            TV2.ToTensor()
            ])
    def prepare_data(self):
        pass
    def setup(self, stage = None):
        if (stage == 'fit' or stage is None):
            full_train_ds = ImageFolder(root=self.data_dir / 'train', transform=self.train_transform)
            train_len = int(0.8 * len(full_train_ds))
            val_len = len(full_train_ds) - train_len
            self.train_ds, self.val_ds = random_split(full_train_ds, [train_len, val_len])
            # train_subset, val_subset = random_split(full_train_ds, [train_len, val_len])
            # self.train_ds = Subset(
            #                     ImageFolder(root=self.data_dir / 'train', transform=self.train_transform),
            #                     indices = train_subset.indices)
            # self.val_ds = Subset(
            #                     ImageFolder(root=self.data_dir / 'train', transform=self.test_transform),
            #                     indices=val_subset.indices)

        if (stage == 'test' or stage is None):
            self.test_ds = ImageFolder(root = self.data_dir / 'test', transform=self.test_transform)

    def train_dataloader(self):
        return DataLoader(dataset = self.train_ds, batch_size=self.batch_size, shuffle=True, num_workers=2)
    def test_dataloader(self):
        return DataLoader(dataset = self.test_ds, batch_size=self.batch_size, num_workers=2)
    def val_dataloader(self):
        return DataLoader(dataset = self.val_ds, batch_size=self.batch_size, num_workers=2)


Writing dataloader/datamodule.py


In [92]:
%%writefile backbones/model_class.py
'''
Class khai báo model TinyVGG:

Tham số:
input: channel ảnh (int)
output_size: số lượng label (int)
kernel: số filter (int)
lr (float)

Cấu trúc: 
1 Conv2d (input, kernel, 3-same)
3 Conv2d (kernel, kernel, 3-same)
2 MaxPool2d (2)
Flatten()
Linear(kernel*H*W/4, output_size)

Trả về:
forward của cấu trúc
training_step() với CEL, f1, adam(lr)
test_step(), validation_step() với CEL, f1
'''

class TinyVGG(PL.LightningModule) :
    def __init__(self, input, output_size, kernel, lr):
        super().__init__()
        self.save_hyperparameters()
        self.train_f1 = F1Score(task = 'multiclass', num_classes=output_size)
        self.val_f1 = F1Score(task = 'multiclass', num_classes=output_size)
        self.test_f1 = F1Score(task = 'multiclass', num_classes=output_size)
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input,
                      out_channels=kernel,
                      kernel_size=3,
                      padding = 'same',
                      stride = 1),
            nn.ReLU(),
            nn.Conv2d(in_channels=kernel,
                      out_channels=kernel,
                      kernel_size=3,
                      padding = 'same',
                      stride = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2) # 32, 16, 14, 14
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(kernel,
                      kernel,
                      kernel_size=3,
                      padding='same'),
            nn.ReLU(),
            nn.Conv2d((kernel),
                      (kernel),
                      kernel_size=3,
                      padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(2) # 32, 16, 7, 7
        )
        self.classification = nn.Sequential(
            nn.Flatten(),
            nn.Linear((kernel)*16*16, output_size)
        )
    def forward(self, X) :
        X = self.block_1(X)
        X = self.block_2(X)
        X = self.classification(X)
        return X
    
    def training_step(self, batch, batch_idx) :
        X,y = batch
        logits = self(X) # đang tự gọi hàm forward của nó
        pred = torch.argmax(logits, dim=1)
        loss = nn.functional.cross_entropy(logits, y)
        self.train_f1.update(pred, y)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', self.train_f1, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        X, y = batch
        logits = self(X)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_f1.update(preds, y)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_f1", self.val_f1, prog_bar=True)

    def test_step(self, batch, batch_idx):
        X, y = batch
        logits = self(X) # đang tự gọi hàm forward của nó
        pred = torch.argmax(logits, dim = 1)
        loss = nn.functional.cross_entropy(logits, y)
        self.test_f1.update(pred, y)
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', self.test_f1, prog_bar=True)

    def predict_step(self, batch, batch_idx):
        X, label = batch
        return self(X)
    
    def configure_optimizers(self): 
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr) 
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9) 
        return {"optimizer": optimizer, "lr_scheduler": scheduler}
        
    

Writing backbones/model_class.py


In [93]:
%%writefile logger.py 
'''
Hàm khai báo các phương thức Logger, CheckPoint, EarlyStopping và Trainer

Tham số:
không

Trả về: 
logger: TensorBoardLogger
checkpoint: theo dõi val_loss, lưu toàn bộ giá trị model tốt nhất
earlystop: theo dõi val_f1, dừng nếu 3 epochs không tăng 0.005
trainer: chạy trên auto accelerator, tối đa 10 epoch
'''
def log_check_early():
    logger = TensorBoardLogger(save_dir='tb_log')
    checkpoint = ModelCheckpoint(monitor='val_loss', mode = 'min', save_top_k=1, save_weights_only=False)
    earlystop = EarlyStopping(monitor='val_f1', mode = 'max', min_delta = 0.005, patience=3)
    trainer = PL.Trainer(accelerator='auto',
                        logger = logger,
                        callbacks = [checkpoint, earlystop],
                        max_epochs=10)
    return logger, checkpoint, earlystop, trainer
# tuner = Tuner(trainer)
# lr_finder = tuner.lr_find(model, datamodule=datamodule, min_lr = 0.001, max_lr = 0.01)
# model.hparams.lr = lr_finder.suggestion()

Writing logger.py


In [None]:
datamodule = datamodule()
model = TinyVGG()

In [83]:
datamodule.setup()
train_loader = datamodule.train_dataloader()
test_loader = datamodule.test_dataloader()
val_loader = datamodule.val_dataloader()

In [84]:
len(train_loader), len(test_loader), len(val_loader)

(6, 3, 2)

In [86]:
from torchinfo import summary
summary(model, [1,3,64,64])

Layer (type:depth-idx)                   Output Shape              Param #
TinyVGG                                  [1, 3]                    --
├─Sequential: 1-1                        [1, 10, 32, 32]           --
│    └─Conv2d: 2-1                       [1, 10, 64, 64]           280
│    └─ReLU: 2-2                         [1, 10, 64, 64]           --
│    └─Conv2d: 2-3                       [1, 10, 64, 64]           910
│    └─ReLU: 2-4                         [1, 10, 64, 64]           --
│    └─MaxPool2d: 2-5                    [1, 10, 32, 32]           --
├─Sequential: 1-2                        [1, 10, 16, 16]           --
│    └─Conv2d: 2-6                       [1, 10, 32, 32]           910
│    └─ReLU: 2-7                         [1, 10, 32, 32]           --
│    └─Conv2d: 2-8                       [1, 10, 32, 32]           910
│    └─ReLU: 2-9                         [1, 10, 32, 32]           --
│    └─MaxPool2d: 2-10                   [1, 10, 16, 16]           --
├─Sequentia

In [85]:
trainer.fit(model, datamodule=datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params | Mode 
-------------------------------------------------------------
0 | train_f1       | MulticlassF1Score | 0      | train
1 | val_f1         | MulticlassF1Score | 0      | train
2 | test_f1        | MulticlassF1Score | 0      | train
3 | block_1        | Sequential        | 1.2 K  | train
4 | block_2        | Sequential        | 1.8 K  | train
5 | classification | Sequential        | 7.7 K  | train
-------------------------------------------------------------
10.7 K    Trainable params
0         Non-trainable params
10.7 K    Total params
0.043     Total estimated model params size (MB)
18        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

d:\Python\envs\DL_ENV\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                            

d:\Python\envs\DL_ENV\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
d:\Python\envs\DL_ENV\lib\site-packages\pytorch_lightning\loops\fit_loop.py:310: The number of training batches (6) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 7: 100%|██████████| 6/6 [00:05<00:00,  1.05it/s, v_num=0, train_loss=0.683, val_loss=0.886, val_f1=0.667]


In [78]:
X,y = next(iter(train_loader))
img = X[0].permute(1,2,0)
img = img.numpy()