In [None]:
%%bash

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
!pip install git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI

In [None]:
import os
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
from sklearn.model_selection import  StratifiedShuffleSplit
from skimage import io, transform
from engine import train_one_epoch, evaluate
import utils
import transforms as T
import torchvision

In [None]:
dirpath = '../input/vinbigdata-512-image-dataset/vinbigdata'

df = pd.read_csv(f'{dirpath}/train.csv')
df = df[df["class_id"]!= 14]

In [None]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
print(df.shape)
df_dd = df.drop_duplicates('image_id')
print(df_dd.shape)
df_dd = df_dd.reset_index()
sss.get_n_splits(df_dd['image_id'], df_dd['class_id'])
for train_index, test_index in sss.split(df_dd['image_id'], df_dd['class_id']):
    train_df = df_dd.iloc[train_index]
    test_df = df_dd.iloc[test_index]

In [None]:
import os
import numpy as np
import torch
from PIL import Image


class VinbigDataset(Dataset):
    def __init__(self, img_folder, df, transforms):
        # directory containing all the images
        self.img_folder = img_folder
        # dataframe containing all the image data
        self.df = df
        self.transforms = transforms
        
    def __len__(self):
        return(len(self.df))

    def __getitem__(self, idx):
        boxes = list(self.df.iloc[idx][["x_min", "y_min", "x_max", "y_max"]].values) # return list of [xmin, ymin, xmax, ymax]
        img = Image.open(os.path.join('../input/vinbigdata-512-image-dataset/vinbigdata',
                                      self.img_folder,
                                      self.df.iloc[idx]["image_id"] + ".png")).convert("RGB")# return an image
        transform = torchvision.transforms.ToTensor()
        img = transform(img)
#         no_bbox = pd.isnull(boxes).any()
#         print(no_bbox)
#         print(boxes)
#         if no_bbox is False:
        boxes = torch.Tensor(3 * boxes).reshape(3,4)
#             print("in if")
#             print(boxes)
#         else:
#             #negative example, ref: https://github.com/pytorch/vision/issues/2144
#             boxes = torch.zeros( 4, dtype=torch.float32)

#         image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1])*(boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        iscrowd = torch.zeros((4,), dtype=torch.int64)
        target = {}
        target["boxes"] = boxes
        target["labels"] = torch.as_tensor((self.df.iloc[idx]["class_id"]+1,), dtype=torch.int64)
        target["image_id"] = torch.Tensor([idx])
        target["area"] = area
        target["iscrowd"] = iscrowd


        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return (img, target)

In [None]:
train_vinbig = VinbigDataset(img_folder="train", df=train_df, transforms=None)
val_vinbig = VinbigDataset(img_folder="train", df=test_df, transforms=None)

In [None]:
train_dataloader = DataLoader(train_vinbig, batch_size=2, shuffle=True, collate_fn=utils.collate_fn)
val_dataloader = DataLoader(val_vinbig, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

fasterRCNN = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 14  
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [None]:
images,targets = next(iter(train_dataloader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)   # Returns losses and detections

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 14


# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [None]:
# let's train it for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    #train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=200)
    # update the learning rate
    #lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, val_dataloader, device=device)