In [1]:
import os
import torch
import time
import numpy as np
import torchvision.transforms as transforms

from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, models
from torch import nn, optim
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

Config

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

all_data = ImageFolder("./data", transform = transform)
num_classes = len(all_data.classes)

train_data, validation_data = random_split(all_data, [int(len(all_data) * 0.8), len(all_data) - int(len(all_data) * 0.8)])

train_loader = DataLoader(train_data, batch_size = 64, shuffle = True)
validation_loader = DataLoader(validation_data, batch_size = 64, shuffle = False)


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

Device: cuda


In [4]:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

Training

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.00001)
scheduler = StepLR(optimizer, step_size=1, gamma = 0.9)

In [6]:
best_loss = float('inf')
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    global best_loss  # To allow updates to best_loss across epochs
    
    # Early stopping parameters
    patience = 5
    patience_counter = 0
    
    model.train()  # Ensure the model is in training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        start_time = time.time()  # Start timer
        
        # Training phase
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        end_time = time.time()  # End timer
        epoch_time = end_time - start_time  # Calculate epoch time

        # Compute average training loss
        avg_train_loss = running_loss / len(train_loader)

        # Validation phase
        model.eval()  # Set the model to evaluation mode
        val_running_loss = 0.0
        
        with torch.no_grad():  # Disable gradient computation during validation
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()

        # Compute average validation loss and accuracy
        avg_val_loss = val_running_loss / len(val_loader)
        scheduler.step()  # Update the learning rate scheduler
        
        print(f'Epoch {epoch+1}| Train Loss: {np.round(avg_train_loss, 6)} | Val Loss: {np.round(avg_val_loss, 6)} | Time: {np.round(epoch_time, 2)}s')

        # Check if the current validation loss is the best we've seen so far
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            patience_counter = 0  # Reset patience counter
            torch.save(model.state_dict(), 'resnet_best_model.pt')  # Save the best model
            print('New best model saved!')
        else:
            patience_counter += 1
            
        # Early stopping check
        if patience_counter >= patience:
            print(f'Early stopping triggered after {epoch+1} epochs!')
            break

        model.train()  # Set the model back to training mode

In [7]:
epochs = 50
train_model(model, train_loader, validation_loader, criterion, optimizer, num_epochs = epochs)

Epoch 1| Train Loss: 1.988978 | Val Loss: 1.747088 | Time: 21.37s
New best model saved!
Epoch 2| Train Loss: 1.619453 | Val Loss: 1.471353 | Time: 19.8s
New best model saved!
Epoch 3| Train Loss: 1.374122 | Val Loss: 1.284491 | Time: 19.72s
New best model saved!
Epoch 4| Train Loss: 1.168655 | Val Loss: 1.14108 | Time: 19.92s
New best model saved!
Epoch 5| Train Loss: 1.015427 | Val Loss: 1.028392 | Time: 20.22s
New best model saved!
Epoch 6| Train Loss: 0.888236 | Val Loss: 0.94664 | Time: 20.05s
New best model saved!
Epoch 7| Train Loss: 0.786572 | Val Loss: 0.877004 | Time: 20.12s
New best model saved!
Epoch 8| Train Loss: 0.708386 | Val Loss: 0.820158 | Time: 20.06s
New best model saved!
Epoch 9| Train Loss: 0.646757 | Val Loss: 0.772682 | Time: 20.15s
New best model saved!
Epoch 10| Train Loss: 0.580986 | Val Loss: 0.733549 | Time: 20.28s
New best model saved!
Epoch 11| Train Loss: 0.514845 | Val Loss: 0.702811 | Time: 20.52s
New best model saved!
Epoch 12| Train Loss: 0.474098 | 