In [1]:
import os

ON_Kaggle = False

RESUME = True

if ON_Kaggle:
    PATH = "dataset-dlacv-project"
    full_path = os.path.join("/kaggle/input", PATH)
    %cd $full_path
    !pwd
    
    # For saving models
    %mkdir -p /kaggle/working/models
    MODEL_SAVE_PATH = '/kaggle/working/models/best_model.pth'
    # For saving logs
    %mkdir -p /kaggle/working/logs
    TB_SAVE_PATH = "/kaggle/working/logs/"
else:
    MODEL_SAVE_PATH = './models/best_model.pth'
    TB_SAVE_PATH = "./logs/"

# Import relevant libraries for the project

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchvision.datasets import ImageFolder
from torchvision.transforms import v2, ToTensor, Normalize
from torchvision.models import resnet18, ResNet18_Weights
from torchvision.transforms import Grayscale

from utils.torch_utils import train

# Preparation

In [3]:
BATCH_SIZE = 8
EPOCHS = 1
NUM_WORKERS = 0  # dont change this inside of the jupyter notebook (it will crash)
SEED = 42

# Classes of images in test dataset
CLASSES = ['ok', 'defective']
N_CLASSES = len(CLASSES)

In [4]:
dir_data = './data/'
dir_data_train = dir_data + 'train/'
dir_data_val = dir_data + 'val/'
dir_data_test = dir_data + 'test/'

In [5]:
# Image size for ResNet18
img_size = (224, 224)

In [6]:
# Set seed
torch.manual_seed(SEED)
np.random.seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [None]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("active device:", device)

In [8]:
# Define Transforms
transform = v2.Compose([
    ToTensor(),
    v2.Resize(img_size, interpolation=v2.InterpolationMode.NEAREST),
    Grayscale(num_output_channels= 1),
    Normalize([0.5], [0.5]),
])

transform_and_augment = v2.Compose([
    ToTensor(),
    v2.Resize(img_size, interpolation=v2.InterpolationMode.NEAREST),
    Grayscale(num_output_channels= 1),
    Normalize([0.5], [0.5]),
    v2.RandomAffine(degrees=15, translate=(0.1, 0.1), fill = 1),  # randomly shift images horizontally/ vertically (fraction of total width/ height)
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(degrees=30, fill=1),
    v2.RandomResizedCrop(size=img_size, scale=(0.8, 1.0), ratio=(0.75, 1.33)),
    v2.RandomErasing(p=0.5, scale=(0.02, 0.25), ratio=(0.3, 3.3), value="random"),
    v2.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2.0)),
])

def target_transform(x):
    return F.one_hot(torch.LongTensor([x]), N_CLASSES)[0].float()

In [9]:
# Load train data
train_dataset = ImageFolder(root = dir_data_train, transform= transform_and_augment, target_transform= target_transform)
train_loader = DataLoader(train_dataset, batch_size= BATCH_SIZE, shuffle= True, num_workers= NUM_WORKERS)

# Load validation data
val_dataset = ImageFolder(root = dir_data_val, transform= transform, target_transform= target_transform)
val_loader = DataLoader(val_dataset, batch_size= BATCH_SIZE, shuffle= True, num_workers= NUM_WORKERS)

# Load test data
test_dataset = ImageFolder(root = dir_data_test, transform= transform, target_transform= target_transform)
test_loader = DataLoader(test_dataset, batch_size= 1, shuffle= False, num_workers= NUM_WORKERS)

In [10]:
# Define the model
class CustomResNet(nn.Module):
    def __init__(self, num_classes= 2, input_channels= 1):
        super(CustomResNet, self).__init__()
        
        # load the pre-trained ResNet18 model
        self.base_model = resnet18(weights= ResNet18_Weights.DEFAULT)
        
        # change the input channels of the model
        self.base_model.conv1 = nn.Conv2d(
            input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
        )
        
        # # Freeze all layers except the last one
        # for param in self.base_model.parameters():
        #     param.requires_grad = False
        
        # change the output layer of the model
        self.base_model.fc = nn.Linear(self.base_model.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.base_model(x)

In [None]:
model = CustomResNet(num_classes= N_CLASSES, input_channels= 1)

# Check which parameters are trainable
for name, param in model.base_model.named_parameters():
    print(f"{name} requires_grad={param.requires_grad}")

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

if RESUME:
    model = torch.load(MODEL_SAVE_PATH, weights_only= False, map_location= device)

model = model.to(device)
print(model)

# Train

In [12]:
criterion = nn.CrossEntropyLoss()
scheduler = ReduceLROnPlateau(optimizer, 
                              'min', 
                              factor=0.1, 
                              patience=6, 
                              eps=5*1e-3, 
                              min_lr=5*1e-9)

In [None]:
history = train(model= model, 
                epochs= EPOCHS, 
                optimizer= optimizer, 
                criterion= criterion, 
                train_loader= train_loader, 
                val_loader= val_loader, 
                device= device, 
                scheduler= scheduler, 
                save_best_path= MODEL_SAVE_PATH,
                tb_log_dir= TB_SAVE_PATH,
                start_epoch= 100) 