In [1]:
import torch
from torch import nn
from pathlib import Path
from tqdm.auto import tqdm
from torchmetrics.classification import Accuracy

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
%%writefile model/data_setup.py
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

from pathlib import Path
"""
Contain for setting updata with full func to create dataloader
"""

def create_dataloader(
                    dataset_path: Path,
                    batch_size: int,
                    train_transform: transforms.Compose,
                    val_transform: transforms.Compose,
                ):
    train_path = dataset_path / 'train'
    val_path = dataset_path / 'val'
    
    train_data = ImageFolder(
        root= train_path,
        transform= train_transform
    )
    
    val_data = ImageFolder(
        root= val_path,
        transform= val_transform
    )
    
    train_dataloader = DataLoader(
        dataset= train_data,
        batch_size= batch_size,
        shuffle= True
    )
    
    val_dataloader = DataLoader(
        dataset= val_data,
        batch_size= batch_size,
        shuffle= False
    )
    
    class_names = train_data.classes
    return train_dataloader, val_dataloader, class_names

Overwriting model/data_setup.py


In [4]:
%%writefile model/model_builder.py
import torch
from torch import nn
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
from torchvision.models import resnet50, ResNet50_Weights

def efficientnet_v2_s_model(class_names: list, device: str):
    weights = EfficientNet_V2_S_Weights.DEFAULT
    model = efficientnet_v2_s(weights= weights)

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
    )

    model = model.to(device)
    return model

def resnet50_model(class_names: list, device: str):
    weights = ResNet50_Weights.DEFAULT
    model = resnet50(weights= weights).to(device)

    model.fc = nn.Sequential(
        nn.Linear(in_features=2048, out_features=len(class_names), bias=True)
    )

    model = model.to(device)
    return model

Overwriting model/model_builder.py


In [5]:
# from torchvision.models import resnet50, ResNet50_Weights
# from torchinfo import summary

# summary(model=resnet50(), 
#         input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape"
#         # col_names=["input_size"], # uncomment for smaller output
#         col_names=["input_size", "output_size", "num_params", "trainable"],
#         col_width=20,
#         row_settings=["var_names"]
# )


In [6]:
# resnet50().fc

In [7]:
%%writefile model/utils.py
import os
from pathlib import Path
import matplotlib.pyplot as plt
import torch

def plot_loss_curves(results: dict[str, list[float]]):
    
    # Get the loss values of the results dictionary (training and val)
    train_loss = results['train_loss']
    val_loss = results['val_loss']
    # Get the accuracy values of the results dictionary (training and val)
    train_accuracy = results['train_acc']
    val_accuracy = results['val_acc']

    # Figure out how many epochs there were
    epochs = range(len(results['train_loss']))

    # Setup a plot 
    plt.figure(figsize=(15, 7))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss, label='train_loss')
    plt.plot(epochs, val_loss, label='val_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracy, label='train_accuracy')
    plt.plot(epochs, val_accuracy, label='val_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend()

    plt.savefig('haha.png')
    return plt

def save_model(model: torch.nn.Module,
               results: dict[str, list[float]]):
    
    graph = plot_loss_curves(results)
    
    target_dir = Path('runs/classify/')
    target_dir.mkdir(parents=True, exist_ok=True)
    
    model_name = 'model.pth'
    graph_name = 'loss.jpg'
    
    train_paths = os.listdir(target_dir)
    
    i = 0
    
    while True:
        train_path = f'train{i}'
        if train_path not in train_paths:
            break
        else:
            i += 1

    target_dir = target_dir / train_path
    
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,exist_ok=True)
    
    model_save_path = target_dir_path / model_name
    graph_save_path = target_dir_path / graph_name
    
    print(f"[INFO] Saving model to: {target_dir}")
    graph.savefig(graph_save_path)
    torch.save(obj=model.state_dict(), f=model_save_path)

Overwriting model/utils.py


In [8]:
%%writefile model/engine.py
import torch
from torchmetrics import Accuracy
from tqdm.auto import tqdm

def __train(model: torch.nn.Module,
            dataloader: torch.utils.data.DataLoader,
            loss_func: torch.nn.Module,
            optimizer: torch.optim.Optimizer,
            mectric_func: Accuracy,
            device: str):
    
    train_loss = 0
    train_acc = 0
    
    model.train()

    for _, (X, y) in enumerate(tqdm(dataloader, desc= '-----Train')):
        X, y = X.to(device), y.to(device)
        
        y_pred = model(X)
        loss = loss_func(y_pred, y)

        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim= 1), dim= 1)
        train_acc += mectric_func(y_pred, y).item()

    train_loss /= len(dataloader)
    train_acc /= len(dataloader)
    
    return train_loss, train_acc

def __val(model: torch.nn.Module,
           dataloader: torch.utils.data.DataLoader,
           loss_func: torch.nn.Module,
           mectric_func: Accuracy,
           device: str):

    val_loss = 0
    val_acc = 0
    
    model.eval()
    
    with torch.inference_mode():
        for _, (X, y) in enumerate(tqdm(dataloader, desc= '-------Val')):
            X, y = X.to(device), y.to(device)
            
            y_pred = model(X)
            loss = loss_func(y_pred, y)
            
            val_loss += loss.item()
            
            y_pred_class = torch.argmax(torch.softmax(y_pred, dim= 1), dim= 1)
            val_acc += mectric_func(y_pred, y).item()

        val_loss /= len(dataloader)
        val_acc /= len(dataloader)

    return val_loss, val_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          val_dataloader: torch.utils.data.DataLoader,
          loss_func: torch.nn.Module,
          optimizer: torch.optim.Optimizer,
          mectric_func: Accuracy,
          epochs: int,
          device: str):
    
    results = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],    
    }
    
    torch.manual_seed(42) 
    torch.cuda.manual_seed(42)

    for epoch in tqdm(range(epochs), desc= 'Epoch'):
        print(f"\n\nEpoch: {epoch+1:2} ------------")
        train_loss, train_acc = __train(model=model,
                                        dataloader=train_dataloader,
                                        loss_func=loss_func,
                                        optimizer=optimizer,
                                        mectric_func=mectric_func,
                                        device= device)
        
        val_loss, val_acc = __val(model=model,
                                dataloader=val_dataloader,
                                loss_func=loss_func,
                                mectric_func=mectric_func,
                                device= device)
        
        print(f"Epoch: {epoch+1:2} | Train Loss: {train_loss:.5f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.5f} | Val Acc: {val_acc:.4f}")
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["val_loss"].append(val_loss)
        results["val_acc"].append(val_acc)

    return results

Overwriting model/engine.py


In [9]:
%%writefile model/train.py
import torch
from torch import nn
from torchvision import transforms

from pathlib import Path
from torchmetrics import Accuracy

from model.data_setup import create_dataloader
from model.engine import train
from model.utils import save_model
from model.model_builder import resnet50_model

def run(dataset_path: str= 'path_to_dataset', epoch:int= 25, learning_rate: float= 0.001, batch_size: int= 32):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    dataset_path = Path(dataset_path)
    
    train_transforms_data = transforms.Compose([
        transforms.RandomResizedCrop(size=(224, 224), antialias=True),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor()
    ])
    
    val_transforms_data = transforms.Compose([
        transforms.Resize(size= 224),
        transforms.ToTensor()
    ])

    train_dataloader, val_dataloader, class_names = create_dataloader(dataset_path=dataset_path,
                                                              batch_size=batch_size,
                                                              train_transform=train_transforms_data,
                                                              val_transform=val_transforms_data)


    model = resnet50_model(class_names= class_names, device= device)
    
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(params= model.parameters(), lr= learning_rate)
    
    mectric_func = Accuracy(task='multiclass', num_classes= len(class_names)).to(device)
    
    results = train(
            model= model,
            train_dataloader= train_dataloader,
            val_dataloader= val_dataloader,
            loss_func= loss_func,
            optimizer= optimizer,
            mectric_func= mectric_func,
            epochs= epoch,
            device= device
    )

    save_model(model= model, results= results)

Overwriting model/train.py


In [None]:
from model import train

train.run(dataset_path='./datasets/tomato', epoch= 2, batch_size= 32)

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]



Epoch:  1 ------------


-----Train:   0%|          | 0/313 [00:00<?, ?it/s]

-------Val:   0%|          | 0/32 [00:00<?, ?it/s]

Epoch:  1 | Train Loss: 2.24206 | Train Acc: 0.2149 | Val Loss: 2.19557 | Val Acc: 0.3389


Epoch:  2 ------------


-----Train:   0%|          | 0/313 [00:00<?, ?it/s]