# Sea Victims Detection - Faster RCNN Training

https://www.kaggle.com/code/ubiratanfilho/sea-victims-detection-faster-rcnn

## Dependencies

In [None]:
!pip install pycocotools

In [None]:
import time
import os
import argparse
import json

import pycocotools.coco as pyco
from pycocotools.cocoeval import COCOeval

from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import torch
from torchvision.transforms import Compose, ToTensor, Resize
from torchvision.models.detection.faster_rcnn import FasterRCNN
from torchvision.models import resnet18, resnet50, resnet101,\
    ResNet101_Weights, ResNet18_Weights, ResNet50_Weights
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR

## Dataset Class

In [None]:
class SDSDataset(Dataset):
    def __init__(self, root, annotation_file, resize):
        self.root = root
        self.coco = pyco.COCO(annotation_file)
        self.ids = list(self.coco.imgs.keys())
        self.num_classes = len(self.coco.cats)
        self.resize = resize
        self.transform = Compose([
            Resize(resize),
            ToTensor()
            ])

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        coco = self.coco

        # Image ID of the input image
        img_id = self.ids[index]
        # Annotation IDs from coco
        ann_ids = coco.getAnnIds(img_id)
        # Load Annotation for the input image
        coco_annotation = coco.loadAnns(ann_ids)
        # Get path for the input image
        path = coco.loadImgs(img_id)[0]['file_name']

        # Open input image
        org_image = Image.open(os.path.join(self.root, path))

        # Get size of input image
        org_height = org_image.height
        org_width = org_image.width

        # Apply transformation (resize) to input image
        image = self.transform(org_image)

        # Get number of objects in the input image
        num_objects = len(coco_annotation)

        # Get bounding boxes and category labels
        # Coco format: bbox = [xmin, ymin, width, height]
        # Pytorch format: bbox = [xmin, ymin, xmax, ymax]
        boxes = []
        labels = []
        for i in range(num_objects):
            # Convert and resize boxes
            xmin = coco_annotation[i]['bbox'][0] / (org_width/self.resize[1])
            ymin = coco_annotation[i]['bbox'][1] / (org_height/self.resize[0])
            xmax = xmin + coco_annotation[i]['bbox'][2] / (org_width/self.resize[1])
            ymax = ymin + coco_annotation[i]['bbox'][3] / (org_height/self.resize[0])
            labels.append(coco_annotation[i]['category_id'])
            boxes.append([xmin, ymin, xmax, ymax])

        # Convert to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        img_id = torch.tensor([img_id])

        # Get (rectangular) size of bbox
        areas = []
        for i in range(num_objects):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)

        # Get Iscrowd
        iscrowd = torch.zeros((num_objects,), dtype=torch.int64)

        # Create annotation dictionary
        annotation = dict()
        annotation['boxes'] = boxes
        annotation['labels'] = labels
        annotation['image_id'] = img_id
        annotation['area'] = areas
        annotation['iscrowd'] = iscrowd

        # Save width and height of the original image to rescale bounding boxes later on
        annotation['org_h'] = torch.as_tensor(org_height, dtype=torch.int64)
        annotation['org_w'] = torch.as_tensor(org_width, dtype=torch.int64)

        return image, annotation

## Reading Data

In [None]:
# Images
train_data_dir = '/kaggle/input/sds-dataset/compressed/images/train'
test_data_dir = '/kaggle/input/sds-dataset/compressed/images/val'
# Annotations
train_annotation_dir = '/kaggle/input/sds-dataset/compressed/annotations/instances_train.json'
test_annotation_dir = '/kaggle/input/sds-dataset/compressed/annotations/instances_val.json'

In [None]:
# Check if Cuda is available
print(f'Cuda available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    # If yes, use GPU
    device = torch.device('cuda')
else:
    # If no, use CPU
    device = torch.device('cpu')

In [None]:
resize = (256, 256)
batch_size = 32

In [None]:
# Create Datasets
train_dataset = SDSDataset(train_data_dir, train_annotation_dir, resize)
test_dataset = SDSDataset(test_data_dir, test_annotation_dir, resize)

In [None]:
# Create Dataloader
def collate_fn(batch):
    return tuple(zip(*batch))

data_loader_train = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True,
                               collate_fn=collate_fn)
data_loader_test = DataLoader(test_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_fn)

## Visualizing Image and Annotations

In [None]:
image, annotations = train_dataset[1000]
# Convert the image tensor to NumPy and permute it for visualization
image_np = image.permute(1, 2, 0).numpy()

fig, ax = plt.subplots(1)
ax.imshow(image_np)

# Get bounding boxes and labels from annotations
boxes = annotations['boxes'].numpy()
labels = annotations['labels'].numpy()

# Loop through each bounding box and draw it
for i, box in enumerate(boxes):
    x1, y1, x2, y2 = box
    rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    plt.text(x1, y1, str(labels[i]), color='white')  # Optional: Add label text

plt.axis('off')
plt.show()

In [None]:
annotations

## Training model

In [None]:
modules = list(resnet18(weights=ResNet18_Weights.DEFAULT).children())[:-2]
backbone = nn.Sequential(*modules)
backbone.out_channels = 512

In [None]:
# Create Anchor Generator
anchor_generator = AnchorGenerator(sizes=((8, 16, 32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

In [None]:
# Initialize FasterRCNN with Backbone and AnchorGenerator
model = FasterRCNN(backbone=backbone,
                   rpn_anchor_generator=anchor_generator,
                   num_classes=train_dataset.num_classes)
# Send model to device
model.to(device)

print('')

In [None]:
model

In [None]:
learning_rate = 0.0001
num_epochs = 50

In [None]:
# Define learning rate, optimizer and scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = Adam(params, lr=learning_rate)
lr_scheduler = StepLR(optimizer, step_size=1, gamma=0.9)

In [None]:
class Evaluator:
    def __init__(self, annotation_dir):

        self.annotation_dir = annotation_dir

        self.coco = pyco.COCO(annotation_dir)
        self.image_ids = list(self.coco.imgs.keys())
        self.annotations = self.get_annotations()

        self.predictions = {
            "images": self.annotations["images"].copy(),
            "categories": self.annotations["categories"].copy(),
            "annotations": None
        }

    def get_annotations(self):
        with open(self.annotation_dir, 'r') as f:
            data = json.load(f)

        for d in data['annotations']:
            d['iscrowd'] = 0

        return data

    def get_predictions(self, preds):
        with open(os.path.join('/kaggle/working', preds), 'r') as f:
            data = json.load(f)

        for new_id, d in enumerate(data, start=1):
            d['id'] = new_id
            d['iscrowd'] = 0
            d['area'] = d['bbox'][2] * d['bbox'][3]

        return data

    def evaluate(self, pred_file, n_imgs=-1):

        self.predictions["annotations"] = self.get_predictions(pred_file)

        coco_ds = pyco.COCO()
        coco_ds.dataset = self.annotations
        coco_ds.createIndex()

        coco_dt = pyco.COCO()
        coco_dt.dataset = self.predictions
        coco_dt.createIndex()

        imgIds = sorted(coco_ds.getImgIds())

        if n_imgs > 0:
            imgIds = np.random.choice(imgIds, n_imgs)

        cocoEval = COCOeval(coco_ds, coco_dt, 'bbox')
        cocoEval.params.imgIds = imgIds
        cocoEval.params.useCats = True
        cocoEval.params.iouType = "bbox"

        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        return cocoEval

In [None]:
# Start Training Process
evaluator = Evaluator(test_annotation_dir)

for epoch in range(num_epochs):
    print()
    print(f'############### Epoch {epoch} ########################################')
    print()
    
    # Training
    model.train()
    epoch_loss = 0
    for images, targets in data_loader_train:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        sum_loss = sum(loss for loss in loss_dict.values())
        epoch_loss += sum_loss

        sum_loss.backward()

        optimizer.step()
        optimizer.zero_grad()
    epoch_loss /= batch_size

    # Evaluation
    model.eval()
    with torch.no_grad():
        for images, targets in data_loader_test:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            images_id = [i['image_id'] for i in targets]

            pred_dict = model(images)

            # Get image id and original size
            img_id = targets[0]['image_id'].item()
            org_width = targets[0]['org_w'].item()
            org_height = targets[0]['org_h'].item()

            # For every prediction:
            pred_list = []
            for box, label, score in zip(pred_dict[0]['boxes'],
                                         pred_dict[0]['labels'],
                                         pred_dict[0]['scores']):

                # Create Dictionary with
                pred_dict_coco = dict()
                pred_dict_coco['image_id'] = img_id
                # Predicted Label
                pred_dict_coco['category_id'] = label.item()
                # Confidence Score
                pred_dict_coco['score'] = score.item()
                # Predicted Bounding Box
                xmin = box[0].item() * (org_width/resize[1])
                ymin = box[1].item() * (org_height/resize[0])
                width = (box[2].item() - box[0].item()) * (org_width/resize[1])
                height = (box[3].item() - box[1].item()) * (org_height/resize[0])
                pred_dict_coco['bbox'] = [xmin, ymin, width, height]
                # And append Dictionary to List
                pred_list.append(pred_dict_coco)

            time_id = time.strftime('%Y_%m_%d-%H_%M')
            with open(os.path.join('/kaggle/working', 'preds.json'.format(time_id)), 'w') as f:
                json.dump(pred_list, f, ensure_ascii=False, indent=4)
    if epoch == 0:
        best_average_loss = epoch_loss

    # If model improved, save weights
    if best_average_loss >= epoch_loss:
        best_average_loss = epoch_loss
        torch.save(
            model.state_dict(),
            'model.pth'
        )

    lr_scheduler.step()

    time_id = time.strftime('%Y_%m_%d-%H_%M')
    print()
    print(f'[{time_id}] '
          f'Loss: {epoch_loss} - LR: {str(lr_scheduler.get_last_lr()[0])} ')
    
    # calculating average precision with COCO
    print('Evaluation')
    evaluator.evaluate('/kaggle/working/preds.json')

print()
print('Training stopped')

In [None]:
# image, annotations = images[0], targets[0]
# pred = preds[0]
# # Convert the image tensor to NumPy and permute it for visualization
# image_np = image.permute(1, 2, 0).cpu().numpy()

# fig, ax = plt.subplots(1)
# ax.imshow(image_np)

# # Get bounding boxes and labels from annotations
# boxes = annotations['boxes'].cpu().numpy()
# labels = annotations['labels'].cpu().numpy()

# # Loop through each bounding box and draw it
# for i, box in enumerate(boxes):
#     x1, y1, x2, y2 = box
#     rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='g', facecolor='none')
#     ax.add_patch(rect)
#     plt.text(x1, y1, str(labels[i]), color='white')  # Optional: Add label text
    
# # Get bounding boxes and labels from annotations
# boxes_pred = pred['boxes'].cpu().numpy()
# labels_pred = pred['labels'].cpu().numpy()

# for i, box in enumerate(boxes_pred):
#     x1, y1, x2, y2 = box
#     rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
#     ax.add_patch(rect)
#     plt.text(x1, y1, str(labels_pred[i]), color='white')  # Optional: Add label text

# plt.axis('off')
# plt.show()