In [None]:
# Install prerequisites

# Install PyCocoTools needed for FasterRCNN
!pip install git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI

# Copy useful functions from pytorch vision tools
%cp ../input/pytorch-vision-tools/references/detection/*.* .

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from os import path
import torch
import torchvision.utils
import utils
from engine import train_one_epoch, evaluate
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.optim import Adam
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import pickle
import time
import matplotlib.pyplot as plt
import tqdm
import torch.nn.functional as F

In [None]:
%matplotlib inline

In [None]:
# Define a dataset class which defines how to load images,targets for training and validation
class VinBigDataset(Dataset):
    def __init__(self, img_dir, df, transforms):
        self.img_dir = img_dir
        self.df = df
        self.imgs = df["image_id"].unique()
        self.transforms = transforms
        
    def __len__(self):
        # Return the number of elements in the dataset
        return len(self.imgs)
    
    def __getitem__(self, idx):
        # The dataset iterates over each image id
        # Return the requested image,target from the dataset
        
        # Get the id of the current image
        img_id = self.imgs[idx]
        
        # Get the rows containing annotations for this image
        data_rows = self.df[self.df["image_id"] == img_id]
        boxes = data_rows[["x_min", "y_min", "x_max", "y_max"]].values
        
        # Convert into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # Load the image
        img_path = path.join(self.img_dir, f"{img_id}.png")
        img = Image.open(img_path).convert("RGB")
        
        # Compute the area of the annotated box
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:,0])
        
        # Suppose all instances are not crowd (?)
        num_objs = len(boxes)
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        # There is only one class, so labels are all ones
        #labels = torch.ones((num_objs,), dtype=torch.int64)
        labels = torch.tensor(data_rows["class_id"].values, dtype=torch.int64)
        
        # Define the target for this training data point
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transforms is not None:
            to_transform = {
                'image': np.array(img),
                'bboxes': target['boxes'],
                'labels': labels
            }
            transformed = self.transforms(**to_transform)
            img = transformed['image']    
            target['boxes'] = torch.tensor(transformed['bboxes'])
        
        return img, target

In [None]:
def create_augmentations(train):
    if train:
        return A.Compose([
                
                A.RandomSizedBBoxSafeCrop(height=800,width=800, erosion_rate=0.2),
                A.HorizontalFlip(p=0.5),
                A.ShiftScaleRotate(p=0.2, rotate_limit=15),
                A.RandomBrightnessContrast(p=0.4),
                A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
                ToTensorV2(p=1.0)
                    
        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
    else:
        return A.Compose([
             A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)
            
        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
def weighted_fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
    # type: (Tensor, Tensor, List[Tensor], List[Tensor])
    """
    Computes the loss for Faster R-CNN.
    Arguments:
        class_logits (Tensor)
        box_regression (Tensor)
        labels (list[BoxList])
        regression_targets (Tensor)
    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """

    labels = torch.cat(labels, dim=0)
    regression_targets = torch.cat(regression_targets, dim=0)

    classification_loss = F.cross_entropy(class_logits.to(device), labels.to(device),
                                          weight=torch.Tensor(vals).to(device))

    #classification_loss_unweighted = F.cross_entropy(class_logits, labels)


    # get indices that correspond to the regression targets for
    # the corresponding ground truth labels, to be used with
    # advanced indexing
    sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
    labels_pos = labels[sampled_pos_inds_subset]
    N, num_classes = class_logits.shape
    box_regression = box_regression.reshape(N, -1, 4)

    box_loss = F.smooth_l1_loss(
        box_regression[sampled_pos_inds_subset, labels_pos],
        regression_targets[sampled_pos_inds_subset],
        reduction="sum",
    )
    box_loss = box_loss / labels.numel()

    return classification_loss, box_loss

In [None]:
# Function to create an instance of the model
def create_model():
    # The model has 14 classes
    num_classes = 15
    torchvision.models.detection.roi_heads.fastrcnn_loss = weighted_fastrcnn_loss
    # Use resnet50 pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        
    # Fetch the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    #model.roi_heads.fastrcnn_loss = weighted_fastrcnn_loss
    
    print(model.roi_heads)
   
    return model

In [None]:
# Load the training data csv file
data_dir = "../input/vinbig1024stratified/"
csv_dir = "../input/vinbig1024stratified-anns-removed"

df_train = pd.read_csv(f"{data_dir}/train.csv")
df_val_1 = pd.read_csv(f"{data_dir}/validation.csv")
#df_val_2 = pd.read_csv(f"{csv_dir}/val_average.csv")

# Remove images that do not contain any anomolies
df_train = df_train[df_train["class_id"]!=14]
df_val_1 = df_val_1[df_val_1["class_id"]!=14]
#df_val_2 = df_val_2[df_val_2["class_id"]!=14]

# Print out the total number of images and the total number of annotations 
print(f"{df_train['image_id'].nunique()} images")
print(f"{len(df_train.index)} annotations")
print()
print(f"{df_val_1['image_id'].nunique()} images")
print(f"{len(df_val_1.index)} annotations")

# print(f"{df_val_2['image_id'].nunique()} images")
# print(f"{len(df_val_2.index)} annotations")

In [None]:
# shift class labels +1 to accounts for class 0 being background class
df_train["class_id"] = df_train["class_id"].apply(lambda x: x +1)

df_val_1["class_id"] = df_val_1["class_id"].apply(lambda x: x +1)
#df_val_2["class_id"] = df_val_2["class_id"].apply(lambda x: x +1)

In [None]:
df_train["class_id"].value_counts().sort_index()

In [None]:
loss_weighting = 14 * ((1/df_train["class_id"].value_counts())/sum(1/df_train["class_id"].value_counts()))
loss_weighting = loss_weighting.sort_index()

In [None]:
vals = loss_weighting.values

In [None]:
vals = np.append([1], vals)

In [None]:
vals

In [None]:
# Create an instance of the dataset and transformations for training
# and validation
imgs_folder = "../input/vinbig1024stratified/output/dataset"
train_img_dir = path.join(imgs_folder, 'train')
val_img_dir = path.join(imgs_folder, 'validation')
# train_img_dir = path.join(data_dir, 'train/train')
train_dataset = VinBigDataset(train_img_dir, df_train, create_augmentations(train=True))
val_dataset_1 = VinBigDataset(val_img_dir, df_val_1, create_augmentations(train=False))
#val_dataset_2 = VinBigDataset(val_img_dir, df_val_2, create_augmentations(train=False))

# Create data loaders for the training and validation set. The collate function
# defines how to form a minibatch from the indiviaual data items. In our case we
# just want to collate them into a single list.

data_loader_train = DataLoader(\
    train_dataset, batch_size=5, shuffle=True, num_workers=4, collate_fn=utils.collate_fn)

data_loader_val_1 = DataLoader(\
    val_dataset_1, batch_size=1, shuffle=False, num_workers=4, collate_fn=utils.collate_fn)

#data_loader_val_2 = DataLoader(\
    #val_dataset_2, batch_size=1, shuffle=False, num_workers=4, collate_fn=utils.collate_fn)

# Print the number of elements in the test and training set
print(f"{len(train_dataset)} items in the training set")
print(f"{len(val_dataset_1)} items in the validation set 1")
#print(f"{len(val_dataset_1)} items in the validation set 2")

In [None]:
import math
import sys
import time
import torch

import torchvision.models.detection.mask_rcnn

from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils


def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # hacky way of stopping ValueError that occurs
        try:
            loss_dict = model(images, targets)
        except ValueError:
            continue
            


        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger

In [None]:
model = create_model()
# Train the model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

# Hyperparameters
learning_rate = 0.001
weight_decay = 0
num_epochs = 25

params = [p for p in model.parameters() if p.requires_grad]

optimizer = Adam(params, lr=learning_rate, weight_decay=weight_decay)
lr_scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.00002)

# Create a folder for saving the model weights
%mkdir model_weights
%mkdir stats

train_meters = []
coco_evals_1 = []
#coco_evals_2 = []
t_start = time.time()
best_ap_50 = 0
for epoch in range(num_epochs):
    # Train over the epoch
    logger = train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=100)
    train_meters.append(logger.meters)
    
    
    # Evaluate on the original validation set
    coco_eval_1 = evaluate(model, data_loader_val_1, device)
    coco_evals_1.append(coco_eval_1.coco_eval)
    
    
#     print()
#     print()
#     print("VALIDATION ON MODIFIED DATASET")
#     # Evaluate on the new validation set
#     coco_eval_2 = evaluate(model, data_loader_val_2, device)
#     coco_evals_2.append(coco_eval_2.coco_eval)

    # Update the learning rate
    lr_scheduler.step()
    
    if coco_eval_1.coco_eval['bbox'].stats[2] > best_ap_50:
        best_ap_50 = coco_eval_1.coco_eval['bbox'].stats[2]
        
        # Save the model weights
        torch.save(model.state_dict(), f"model_weights/model_weights.bin")

    # Save the stats
    pickle.dump(train_meters, open(f"stats/train_stats.pkl", 'wb'))
    pickle.dump(coco_evals_1, open(f"stats/val_stats_1.pkl", 'wb'))
    #pickle.dump(coco_evals_2, open(f"stats/val_stats_2.pkl", 'wb'))


t_end = time.time()
print(f"Training took {t_end - t_start}")
      
