In [1]:
from src.core import *
from src.rois import *
from functools import partial

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torchvision import transforms, models
from torchvision.transforms.functional import resized_crop

from fastai.vision.all import DataLoaders, OptimWrapper, Learner, Metric

In [2]:
loader = JSONLoader('data')
df, id2label, id2img = loader.load_train()

In [3]:
n = 50
sample_df = df.sample(n)
train_df, eval_df = sample_df.iloc[:int(0.8*n)], sample_df.iloc[int(0.8*n):]

In [39]:
ann_data = [
    {'id':i,'image_id':row['image_id'],'bbox':bbox,'category_id':cat_id,'iscrowd':0,'area':0}
    for i, row in eval_df.iterrows()
    for bbox, cat_id in zip(row['bbox'], row['category_id'])
]
images_data = [
    {'id':row['image_id'], 'file_name':f'{str(row['image_id']).zfill(12)}.jpg'}
    for _, row in eval_df.iterrows()
]
cat_data = [{'id':k, 'name':v} for k,v in id2label.items() if k!=0]
with open('tmp/eval_gt.json', 'w') as f:
    json.dump({'categories':cat_data,'annotations':ann_data,'images':images_data}, f, indent=4)

In [5]:
from tqdm import tqdm

tqdm.pandas()
train_res = train_df.progress_apply(get_annotated_rois, axis=1, id2img=id2img)

train_img_ids = torch.cat([row[0] for row in train_res])
train_rois = torch.cat([row[1] for row in train_res])
train_roi_ids = torch.cat([row[2] for row in train_res])
train_offsets = torch.cat([row[3] for row in train_res])
train_img_dims = torch.cat([row[4] for row in train_res])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [01:10<00:00,  1.77s/it]


In [6]:
eval_res = eval_df.progress_apply(get_annotated_rois, axis=1, id2img=id2img)

eval_img_ids = torch.cat([row[0] for row in eval_res])
eval_rois = torch.cat([row[1] for row in eval_res])
eval_roi_ids = torch.cat([row[2] for row in eval_res])
eval_offsets = torch.cat([row[3] for row in eval_res])
eval_img_dims = torch.cat([row[4] for row in eval_res])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.87s/it]


In [8]:
class RCNNDataset(Dataset):
    def __init__(self, img_ids, rois, roi_ids, offsets, img_dims, id2img, crop_size=(224,224)):
        self.img_ids, self.rois, self.roi_ids = img_ids, rois, roi_ids
        self.offsets, self.img_dims = offsets, img_dims
        self.id2img = id2img
        self.crop_size = crop_size
        self.img_tfms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return self.img_ids.shape[0]

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img = Image.open(self.id2img[img_id.item()]).convert('RGB')
        img = self.img_tfms(img)
        offset = self.offsets[idx]/self.img_dims[idx]
        x_min, y_min, w, h = self.rois[idx].int().tolist()
        crop = resized_crop(img, top=y_min, left=x_min, height=h, width=w, size=self.crop_size)
        
        return crop, img_id, self.rois[idx], self.roi_ids[idx], offset, self.img_dims[idx]

In [9]:
train_ds = RCNNDataset(train_img_ids, train_rois, train_roi_ids, train_offsets, train_img_dims, id2img)
eval_ds = RCNNDataset(eval_img_ids, eval_rois, eval_roi_ids, eval_offsets, eval_img_dims, id2img)

In [10]:
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, pin_memory=True)
eval_dl = DataLoader(eval_ds, batch_size=32, shuffle=False, pin_memory=True)

dls = DataLoaders(train_dl, eval_dl)
dls.n_inp = 1

In [11]:
vgg16 = models.vgg16(weights=models.VGG16_Weights.DEFAULT)

vgg16.classifier[0].in_features

25088

In [12]:
class RCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.img_encoder = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
        for param in self.img_encoder.parameters():
            param.requires_grad = False
        self.img_encoder.eval()
        encode_dim = self.img_encoder.classifier[0].in_features
        self.img_encoder.classifier = nn.Sequential()

        self.cls_head = nn.Linear(encode_dim, n_classes)
        self.reg_head = nn.Sequential(
            nn.Linear(encode_dim, 512), nn.ReLU(),
            nn.Linear(512, 4), nn.Tanh(),
        )

    def forward(self, crops):
        features = self.img_encoder(crops)
        probs = self.cls_head(features)
        bbox = self.reg_head(features)
        return probs, bbox

    def calc_loss(self, preds, *targs, beta=0.2):
        probs, pred_offsets = preds
        _, _, ids, offsets, _ = targs
        cls_loss = nn.CrossEntropyLoss()(probs, ids)
        reg_loss = torch.tensor(0.0, requires_grad=True)
        mask = ids!=0
        if torch.sum(mask)>0:
            reg_loss = nn.MSELoss()(pred_offsets[mask], offsets[mask])
            
        return beta*cls_loss + (1-beta)*reg_loss

In [46]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

class mAP(Metric):
    def __init__(self, gt_path, pred_path):
        self.gt_path, self.pred_path = gt_path, pred_path
        self.reset()
        
    def reset(self):
        with open(self.pred_path, "w") as f:
            json.dump([], f, indent=4)

    def accumulate(self, learn):
        probs, pred_offsets = learn.pred
        scores, pred_ids = probs.max(dim=1)
        img_ids, rois, ids, _, img_dims  = learn.y
        mask = ids!=0
        pred_offsets = pred_offsets*img_dims
        pred_bbs = rois+pred_offsets
        
        self.write_to_file(img_ids[mask], pred_ids[mask], pred_bbs[mask], scores[mask])

    def write_to_file(self, img_ids, ids, pred_bbs, scores):
        iterables = [img_ids.tolist(), ids.tolist(), pred_bbs.tolist(), scores.tolist()]
        new_anns = [
            {'image_id':img_id, 'category_id':id, 'bbox':bbox, 'score':score}
            for img_id, id, bbox, score in zip(*iterables)
        ]
        with open(self.pred_path, 'r') as f:
            data = json.load(f)
        data += new_anns
        with open(self.pred_path, 'w') as f:
            json.dump(data, f, indent=4)
            
    @property
    def value(self):
        coco_gt = COCO(self.gt_path)
        coco_pred = coco_gt.loadRes(self.pred_path)
        cocoEval = COCOeval(coco_gt, coco_pred, 'bbox')
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        return cocoEval.stats[0]

In [47]:
model = RCNN(len(id2label))
opt_func = partial(OptimWrapper, opt=torch.optim.Adam)
mAP_metric = mAP('tmp/eval_gt.json', 'tmp/eval_preds.json')

learn = Learner(dls, model, loss_func=model.calc_loss, opt_func=opt_func, metrics=mAP_metric)

In [48]:
learn.fit_one_cycle(n_epoch=1, lr_max=1e-4)

epoch,train_loss,valid_loss,m_ap,time
0,0.136659,0.16065,0.0,08:16


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe