## Tutorial 3: Training Faster-RCNN model for Chest X-ray Abnormalities Detection

### Introduction

This is a quick walkthrough notebook to demonstrate how to train Faster-RCNN detection model on Chest X-ray Abnormalities Dataset using PyTorch!

### Table of contents

- [Imports](#Imports)
- [Load VinBigData Chest X-ray Abnormalities Detection Dataset](#Load-VinBigData-Chest-X-ray-Abnormalities-Detection-Dataset)
- [Define dataloaders](#Define-dataloaders)
- [Define loss](#Define-helper-class-for-loss-calculation)
- [Load model](#Load-model)
- [Start training loop](#start-training-loop)

#### Imports

In [1]:
import time
import warnings
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from farabio.data.biodatasets import VinBigDataset

warnings.filterwarnings('ignore')

#### Load  VinBigData Chest X-ray Abnormalities Detection Dataset

In [2]:
_path = "/home/data/07_SSD4TB/public-datasets/vinbigdata-chest-xray-abnormalities-detection"

train_dataset = VinBigDataset(_path, transform=None, download=False, mode="train", show=False)
valid_dataset = VinBigDataset(_path, transform=None, download=False, mode="val", show=False)

#### Define dataloaders

In [3]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=32,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=32,
    collate_fn=collate_fn
)

#### Define helper class for loss calculation

In [4]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

#### Load model

In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
multi_gpu = True
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = train_dataset.num_classes + 1  # + 1 for background

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

if torch.cuda.device_count() > 1 and multi_gpu is True:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)

model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

Let's use 6 GPUs!


#### Start training loop

In [6]:
num_epochs =  10 # change here to try train more
loss_hist = Averager()
itr = 1
lossHistoryiter = []
lossHistoryepoch = []

start = time.time()

for epoch in range(num_epochs):
    loss_hist.reset()
    
    for images, targets, image_ids in train_data_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)  
        
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)
        lossHistoryiter.append(loss_value)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()
    lossHistoryepoch.append(loss_hist.value)
    print(f"Epoch #{epoch} loss: {loss_hist.value}")   
    
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("Time taken to Train the model :{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

Iteration #50 loss: 0.3085425794124603
Iteration #100 loss: 0.5478842258453369
Iteration #150 loss: 0.4258052706718445
Iteration #200 loss: 0.2777027189731598
Iteration #250 loss: 0.2505597174167633
Epoch #0 loss: 0.3619921253859133
Iteration #300 loss: 0.31739604473114014
Iteration #350 loss: 0.41544443368911743
Iteration #400 loss: 0.30315133929252625
Iteration #450 loss: 0.3148651719093323
Iteration #500 loss: 0.24818390607833862
Iteration #550 loss: 0.3079833388328552
Epoch #1 loss: 0.347546909922275
Iteration #600 loss: 0.3068886399269104
Iteration #650 loss: 0.38821980357170105
Iteration #700 loss: 0.2054576724767685
Iteration #750 loss: 0.3481389284133911
Iteration #800 loss: 0.2565847933292389
Epoch #2 loss: 0.33110840413449466
Iteration #850 loss: 0.3899312913417816
Iteration #900 loss: 0.35357269644737244
Iteration #950 loss: 0.32706546783447266
Iteration #1000 loss: 0.3869641125202179
Iteration #1050 loss: 0.21673503518104553
Iteration #1100 loss: 0.338126540184021
Epoch #3 