In [1]:
import torch
from torch import nn
from pathlib import Path
from tqdm.auto import tqdm
from torchmetrics.classification import Accuracy

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
dataset_path = Path('./datasets/tomato/')

train_path = dataset_path / 'train'
val_path = dataset_path / 'val'

train_path, val_path

In [6]:
%%writefile model/data_setup.py
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

from pathlib import Path
"""
Contain for setting updata with full func to create dataloader
"""

def create_dataloader(
                    dataset_path: Path,
                    batch_size: int,
                    train_transform: transforms.Compose,
                    val_transform: transforms.Compose,
                ):
    """Creates training and testing DataLoaders.
    
    Takes in a dataset directory path and turns
    them into PyTorch Datasets and then into PyTorch DataLoaders.
    
    Args:
        dataset_path: Path to dataset directory.
        batch_size: Number of samples per batch in each of the DataLoaders.
        train_transform: torchvision transforms to perform on training data.
        train_transform: torchvision transforms to perform on validation data.
    
    Returns:
        A tuple of (train_dataloader, test_dataloader, class_names).
        Where class_names is a list of the target classes.
    Example usage:
      train_dataloader, test_dataloader, class_names = \
        = create_dataloaders(train_dir=path/to/train_dir,
                             test_dir=path/to/test_dir,
                             transform=some_transform,
                             batch_size=32,
                             num_workers=4)
    """
    train_path = dataset_path / 'train'
    val_path = dataset_path / 'val'
    
    train_data = ImageFolder(
        root= train_path,
        transform= train_transform
    )
    
    val_data = ImageFolder(
        root= val_path,
        transform= val_transform
    )
    
    train_dataloader = DataLoader(
        dataset= train_data,
        batch_size= batch_size,
        shuffle= True
    )
    
    val_dataloader = DataLoader(
        dataset= val_data,
        batch_size= batch_size,
        shuffle= False
    )
    
    class_names = train_data.classes
    return train_dataloader, val_dataloader, class_names

Overwriting model/data_setup.py


In [10]:
%%writefile model/model_builder.py
import torch
from torch import nn
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights

def EfficientNet_V2_S(class_names: list):
    """
    
    """
    weights = EfficientNet_V2_S_Weights.DEFAULT
    model = efficientnet_v2_s(weights= weights).to(device)

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
    )

    return model

Overwriting model/model_builder.py


In [None]:
import torch
from torch import nn
import torchinfo

from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights

weights = EfficientNet_V2_S_Weights.DEFAULT
model = efficientnet_v2_s(weights= weights).to(device)

# torch.manual_seed(42)
# torch.cuda.manual_seed(42)

# model = efficientnet_v2_s().to(device)
    
torchinfo.summary(model= model,
                  input_size= (1, 3, 384, 384),
                  col_names=["input_size", "output_size", "num_params", "trainable"],
                  col_width=20,
                  row_settings=["var_names"])

In [None]:
# auto_transforms = weights.transforms()
# auto_transforms

In [None]:
import torch
from torchvision import transforms

train_transforms_data = transforms.Compose([
    # transforms.Resize(size= 128),
    transforms.RandomResizedCrop(size=(224, 224), antialias=True),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    # transforms.ToDtype(torch.float32, scale=True)
    transforms.ToTensor()
])

val_transforms_data = transforms.Compose([
    transforms.Resize(size= 224),
    # transforms.ToDtype(torch.float32, scale=True)
    transforms.ToTensor()
])


In [None]:
# train_dataloader, val_dataloader, class_names = create_dataloader(dataset_path=dataset_path,
#                                                               batch_size=32,
#                                                               train_transform=auto_transforms,
#                                                               val_transform=auto_transforms)

In [None]:
train_dataloader, val_dataloader, class_names = create_dataloader(dataset_path=dataset_path,
                                                              batch_size=32,
                                                              train_transform=train_transforms_data,
                                                              val_transform=val_transforms_data)

In [None]:
model.classifier

In [None]:
# for parameter in model.features.parameters():
#     parameter.requires_grad = False

model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
)
torchinfo.summary(model= model,
                  input_size= (1, 3, 384, 384),
                  col_names=["input_size", "output_size", "num_params", "trainable"],
                  col_width=20,
                  row_settings=["var_names"])

In [None]:
def __train(model: torch.nn.Module,
            dataloader: torch.utils.data.DataLoader,
            loss_func: torch.nn.Module,
            optimizer: torch.optim.Optimizer,
            mectric_func: Accuracy,
            device= device):
    
    train_loss = 0
    train_acc = 0
    
    model.train()

    for _, (X, y) in enumerate(tqdm(dataloader, desc= '-----Train')):
        X, y = X.to(device), y.to(device)
        
        y_pred = model(X)
        loss = loss_func(y_pred, y)

        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim= 1), dim= 1)

        train_acc += mectric_func(y_pred, y).item()

    train_loss /= len(dataloader)
    train_acc /= len(dataloader)
    
    return train_loss, train_acc

def __val(model: torch.nn.Module,
           dataloader: torch.utils.data.DataLoader,
           loss_func: torch.nn.Module,
           mectric_func: Accuracy,
           device= device):

    val_loss = 0
    val_acc = 0
    
    model.eval()
    
    with torch.inference_mode():
        for _, (X, y) in enumerate(tqdm(dataloader, desc= '-------Val')):
            X, y = X.to(device), y.to(device)
            
            y_pred = model(X)
            loss = loss_func(y_pred, y)
            
            val_loss += loss.item()
            
            y_pred_class = torch.argmax(torch.softmax(y_pred, dim= 1), dim= 1)
            val_acc += mectric_func(y_pred, y).item()

        val_loss /= len(dataloader)
        val_acc /= len(dataloader)

    return val_loss, val_acc

In [None]:
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          val_dataloader: torch.utils.data.DataLoader,
          loss_func: torch.nn.Module,
          optimizer: torch.optim.Optimizer,
          mectric_func: Accuracy,
          epochs: int,
          device= device):
    
    results = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],    
    }
    
    torch.manual_seed(42) 
    torch.cuda.manual_seed(42)

    for epoch in tqdm(range(epochs), desc= 'Epoch'):
        print(f"\n\nEpoch: {epoch+1:2} ------------")
        train_loss, train_acc = __train(model=model,
                                        dataloader=train_dataloader,
                                        loss_func=loss_func,
                                        optimizer=optimizer,
                                        mectric_func=mectric_func,)
        
        val_loss, val_acc = __val(model=model,
                                dataloader=val_dataloader,
                                loss_func=loss_func,
                                mectric_func=mectric_func,)
        
        print(f"Epoch: {epoch+1:2} | Train Loss: {train_loss:.5f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.5f} | Val Acc: {val_acc:.4f}")
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["val_loss"].append(val_loss)
        results["val_acc"].append(val_acc)

    return results

In [None]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params= model.parameters(), lr= 0.001)

mectric_func = Accuracy(task='multiclass', num_classes= len(class_names)).to(device)

results = train(
        model= model,
        train_dataloader= train_dataloader,
        val_dataloader= val_dataloader,
        loss_func= loss_func,
        optimizer= optimizer,
        mectric_func= mectric_func,
        epochs= 25,
)

In [None]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import torch

def save_model(model: torch.nn.Module,
               graph: plt):
    
    target_dir = Path('runs/classify/')
    model_name = 'model.pth'
    graph_name = 'loss.jpg'
    
    train_paths = os.listdir(target_dir)
    print(train_paths)
    
    i = 0
    
    while True:
        train_path = f'train{i}'
        if train_path not in train_paths:
            break
        else:
            i += 1

    target_dir = target_dir / train_path
    
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,
                        exist_ok=True)
    
    model_save_path = target_dir_path / model_name
    graph_save_path = target_dir_path / graph_name
    
    print(f"[INFO] Saving model to: {target_dir}")
    graph.savefig('graph_save_path')
    torch.save(obj=model.state_dict(),
             f=model_save_path)

In [None]:
save_model(model= 123,
           graph= plt)

In [None]:
import matplotlib.pyplot as plt
def plot_loss_curves(results: dict[str, list[float]]):
    
    # Get the loss values of the results dictionary (training and val)
    loss = results['train_loss']
    # Get the accuracy values of the results dictionary (training and val)
    accuracy = results['train_acc']
    val_accuracy = results['val_acc']

    # Figure out how many epochs there were
    epochs = range(len(results['train_loss']))

    # Setup a plot 
    plt.figure(figsize=(15, 7))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, loss, label='train_loss')
    plt.plot(epochs, val_loss, label='val_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, accuracy, label='train_accuracy')
    plt.plot(epochs, val_accuracy, label='val_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend()
    return plt

In [None]:
# plot_loss_curves(results)