# Building a custom object detection model with Resnet FRCNN as backbone

In [1]:
import os
import json
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
import torch.nn as nn
import torchvision.models.detection as detection
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.roi_heads import RoIHeads
from torchvision.ops import MultiScaleRoIAlign
from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor
from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import MultiScaleRoIAlign
from torch.utils.data import random_split
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard
import tensorflow as tf
import datetime


from LoadingBMWDataset import CustomObjectDetectionDataset

torch.cuda.empty_cache()
torch.cuda.is_available()

2024-03-14 20:54:01.002263: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-14 20:54:01.112917: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


True

In [2]:
# Specify the image and annotation directories
img_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/images'
annotation_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/labels/json'

# Create a CustomObjectDetectionDataset object
dataset = CustomObjectDetectionDataset(img_dir, annotation_dir)

dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
validation_size = dataset_size - train_size

train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])


# The custom collate function will be used to stack the images and targets into a batch
def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    return images, targets

# When creating the DataLoaders for training and validation, pass the custom collate function
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
validation_loader = DataLoader(validation_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

# Ensring that the dataset is loaded correctly
print(type(dataset[0][0]))

<class 'torch.Tensor'>


In [3]:
# Defining the custom model function to get the pre-trained Faster R-CNN model
def get_custom_model(num_classes):
    # Loading a pre-trained Faster R-CNN model
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Getting the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # Creating the custom layers
    # Note: It's crucial to match the input size of the first custom layer to 'in_features'
    custom_head_layers = nn.Sequential(
        nn.Linear(in_features, 512),
        nn.ReLU(),
        nn.Linear(512, 256),
        nn.ReLU(),
    )

    # Extending the box predictor to include custom layers before final classification and regression layers
    model.roi_heads.box_predictor = FastRCNNPredictor(1024, num_classes)
    
    return model

# Using the function to get the customized model
num_classes = 7 + 1  # 7 classes + background
model = get_custom_model(num_classes)




In [4]:
# Creating the optimizer based on the pre-trained model parameters and the modified custom layers
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

# Moving the model to the device
model.to(device)

writer = SummaryWriter('runs/faster-rcnn-object-detection')

cuda


In [7]:
# Selecting the parameters to be optimized
params = [p for p in model.parameters() if p.requires_grad]

# Defining the optimizer, being the Stochastic Gradient Descent (SGD)
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 35

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    train_loss = 0
    
    # Training phase
    for i, (images, targets) in enumerate(train_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        train_loss += losses.item()
        losses.backward()
        optimizer.step()
        writer.add_scalar('Loss/Train', losses.item(), epoch * len(train_loader) + i)

    print(f'Epoch: {epoch}, Loss: {losses.item()}')
    avg_train_loss = train_loss / len(train_loader)

    writer.add_scalar('Loss/train_avg', avg_train_loss, epoch)

    val_loss = 0
    with torch.no_grad():  # Disable gradient calculation
        for images, targets in validation_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Temporarily set the model to training mode to compute losses
            model.train()
            loss_dict = model(images, targets)
            model.eval()  # Set it back to evaluation mode
            
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

    avg_val_loss = val_loss / len(validation_loader)

print(f'Epoch: {epoch}, Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')

writer.close()

No boxes found for image: 821.jpg


  target['labels'] = torch.tensor(labels, dtype=torch.int64)


No boxes found for image: 678.jpg
No boxes found for image: 2433.jpg
Epoch: 0, Loss: 0.10246841609477997
No boxes found for image: 1154.jpg
No boxes found for image: 821.jpg
No boxes found for image: 2433.jpg
No boxes found for image: 678.jpg
Epoch: 1, Loss: 0.15769881010055542
No boxes found for image: 1154.jpg
No boxes found for image: 678.jpg
No boxes found for image: 2433.jpg
No boxes found for image: 821.jpg
Epoch: 2, Loss: 0.11370489746332169
No boxes found for image: 1154.jpg
No boxes found for image: 2433.jpg
No boxes found for image: 678.jpg
No boxes found for image: 821.jpg
Epoch: 3, Loss: 0.0877179205417633
No boxes found for image: 1154.jpg
No boxes found for image: 821.jpg
No boxes found for image: 2433.jpg
No boxes found for image: 678.jpg
Epoch: 4, Loss: 0.11658086627721786
No boxes found for image: 1154.jpg
No boxes found for image: 2433.jpg
No boxes found for image: 821.jpg
No boxes found for image: 678.jpg
Epoch: 5, Loss: 0.10396294295787811
No boxes found for image: 

## Saving the model's parameters for future use

In [8]:
# Saving the model's state dictionary
torch.save(model.state_dict(), 'customResnet50FasterRCNN.pth')


In [None]:
########################################################################################################
# To reload the model with the last saved state dictionary (or parameters), run the following code:    #
#                                                                                                      #
#                                                                                                      #
# Recreating the model instance to ensure the same architecture is used                                #
model = get_custom_model(num_classes)

# Loading the saved state dictionary into the model to pick up training from where we left off
model.load_state_dict(torch.load('model_state_dict.pth'))

# Setting the device to be used for training                                                           #
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
#                                                                                                      #
# Move the model to the desired device                                                                 #
model.to(device)
########################################################################################################