In [142]:
from collections import namedtuple
from collections import OrderedDict
import torch
import torch.nn.functional as F
from torch import nn
from torchvision.models import resnet50
import torch
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import VOCDetection
import torch.optim as optim
import PIL.Image as Image
import PIL.ImageColor as ImageColor
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
import imageio
import math
from torchvision.transforms import Resize
import matplotlib.pyplot as plt
import numpy as np
from easydict import EasyDict as edict
import tensorflow as tf
from IPython import display
import random

In [133]:
resnet50=resnet50(pretrained=True)

In [134]:
resnet=nn.Sequential(*list(resnet50.children()))[:-2]

In [135]:
resnet[4][0].downsample[0]=nn.Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

In [136]:
resnet[5][0].conv2=nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [137]:
resnet[6][0].conv2=nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [138]:
resnet[7][0].conv2=nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

In [90]:
class make_layers(nn.Module):
    def __init__(self):
        super(make_layers,self).__init__()
        
    
    def get_group_gn(self,dim,dim_per_gp,num_group):
        
        if dim_per_gp>0:
            
            group_gn=dim//dim_per_gp
        else:
            
            group_gn=num_group
        return group_gn
    
    def group_norm(self,out_channel,affine=True,divisor=1):
        
        out_channel=out_channel//divisor
        
        dim_per_gp=-1//divisor
        
        num_group=32//divisor
        
        eps=1e-5
        
        return nn.GroupNorm(self.get_group_gn(out_channel,dim_per_gp,num_group),
                           out_channel,
                           eps,
                           affine)
    
    def conv_with_kaiming_uniform(self,use_gn=False,use_relu=False):
        
        def make_conv(in_channel,out_channel,kernel_size,stride=1,dilation=1):
            
            conv=nn.Conv2d(in_channel,out_channel,kernel_size,stride,padding=dilation*(kernel_size-1)//2,bias=False if use_gn else True)
            
            nn.init.kaiming_normal_(conv.weight,a=1)
            
            if not use_gn:
                
                nn.init.constant_(conv.bias,0)
                
            module=[conv,]
            
            if use_gn:
                
                module.append(self.group_norm(out_channel))
            if use_relu:
                
                module.append(nn.ReLU(inplace=True))
                
            if len(module)>1:
                return nn.Sequential(*module)
            
            return conv
    
                

In [97]:
class lastlevelp6p7(nn.Module):
    def __init__(self,in_channels,out_channels):
        
        super(lastlevelp6p7,self).__init__()
        
        self.p6=nn.Conv2d(in_channels,out_channels,3,2,1)
        self.p7=nn.Conv2d(out_channels,out_channels,3,2,1)
        
        for module in [self.p6,self.p7]:
            
            nn.init.kaiming_normal_(module.weight,a=1)
            nn.init.constant_(module.bias,0)
            
        self.use_p5=in_channels==out_channels
        
    def forward(self,p5):
        x=p5
        p6=self.p6(x)
        p7=self.p7(F.relu(p6))
        
        return [p6,p7]

In [99]:
class FPN(nn.Module):
    
    def __init__(self,in_channels_list,out_channels,top_blocks=None):
        super(FPN,self).__init__()
        
        self.inner_blocks=[]
        self.layer_blocks=[]
        
        for idx,in_channels in enumerate(in_channels_list,1):
            
            inner_block="fpn_inner{}".format(idx)
            layer_block="fpn_layer{}".format(idx)
            
            if in_channels==0:
                continue
            inner_block_module=make_layers().conv_with_kaiming_uniform()(in_channels,out_channels,1)
            layer_block_module=make_layers().conv_with_kaiming_uniform()(out_channels,out_channels,3,1)
            
            self.add_module(inner_block,inner_block_module)
            
            self.add_module(layer_block,layer_block_module)
            
            self.inner_blocks.append(inner_block)
            self.layer_blocks.append(layer_block)
            
        self.top_blocks=top_blocks
    def forward(self,x):
        
        last_inner=getattr(self,self.inner_blocks[-1])(x[-1])
        results=[]
        
        results.append(getattr(self,self.layer_blocks[-1])(last_inner))
        
        for feature ,inner_block,layer_block in zip(x[:-1][::-1],self.inner_blocks[:,-1][::-1],self.layer_blocks[:-1][::-1]):
            
            inner_top_down=F.interpolate(last_inner,scale_factor=2,mode="nearest")
            inner_lateral=getattr(self,inner_block)(feature)
            
            last_inner=inner_top_down+inner_lateral
            
            results.insert(0,getattr(self,layer_block)(last_inner))
            
        last_result=self.top_blocks(results[-1])
        
        results.extend(last_result)
        
        return tuple(results) 

In [None]:
class backbone(nn.Module):
    def __init__(self,resnet,image):
        super(backbone,self).__init__()
        
        self.basestem=resnet[0:4]
        
        self.layer1=resnet[4]
        self.layer2=resnet[5]
        self.layer3=resnet[6]
        self.layer4=resnet[7]
        
        
    def get_C3_to_C5(self):
        
        x=self.image
        x=self.basestem(x)
        
        C2=self.layer1(x)
        
        C3=self.layer2(C2)
        
        C4=self.layer3(C3)
        
        C5=self.layer4(C4)
        
        return [C2,C3,C4,C5]
    def build_p3_p7(self):
        
        body=self.get_C3_to_C5()
        
        in_channels_stage2=256
        out_channels=256*4
        
        in_channel_p6p7=256*8
        
        fpn=FPN(
        in_channels_list=[
            0,
            256*2,
            256*4,
            256*8
        ],
        out_channels=256*4,
        top_blocks=lastlevelp6p7(256*8,out_channels))(body)
        
        return fpn
        
        

In [None]:
class BoxCoder(object):
    
    def __init__(self,weight,bbox_xform_clip=math.log(1000./16)):
        
        self.weight=weight
        self.bbox_xform_clip=bbox_xform_clip
        
    def encoder(self,reference_box,proposals):
        
        ex_width=proposals[:,2]-proposals[:,0]+1
        ex_height=proposals[:,3]-proposals[:,1]+1
        ex_ctr_x=proposals[:,0]+0.5*ex_width
        ex_ctr_y=proposals[:,1]+0.5*ey_hight
        
        gt_width=proposals[:,2]-proposals[:,0]+1
        gt_height=proposals[:,3]-proposals[:,1]+1
        gt_ctr_x=proposals[:,0]+0.5*gt_width
        gt_ctr_y=proposals[:,1]+0.5*gt_hight
        
        wx,wy,ww,wh=self.weight
        
        target_dx=wx*(gt_ctr_x-ex_ctr_x)/ex_width
        target_dy=wy*(gt_ctr_y-ex_ctr_y)/ex_hight
        target_dw=ww*torch.log(gt_width/ex_width)
        target_dh=wh*torch.log(gt_height/ex_height)
        
        targets=torch.stack((target_dx,target_dy,target_dw,target_dy),dim=1)
        
        return targets
    
    def decode(self,rel_codes,box):
        
        width=box[:,2]-box[:,0]+1
        height=box[:,3]-box[:,2]+1
        ctr_x=box[:,0]+0.5*width
        ctr_y=box[:,1]+0.5*height
        
        wx,wy,ww,wh=self.weight
        
        dx=rel_codes[:,0]/wx
        dy=rel_codes[:,1]/wy
        dw=rel_codes[:,2]/ww
        dh=rel_codes[:,3]/wh
        
        dw=torch.clamp(dw,max=self.bbox_xform_clip)
        dh=torch.clamp(dh,max=self.bbox_xform_clip)
        
        pred_ctr_x=dx*width[:,None]+ctr_x[:,None]
        pred_ctr_y=dy*height[:,None]+ctr_y[:,None]
        pred_w=torch.exp(dw)*width[:,None]
        pred_h=torch.exp(dh)*height[:,None]
        
        pred_boxes=torch.zeros_like(rel_codes)
        
        pred_boxes[:,0]=pred_ctr_x-0.5*pred_w
        pred_boxes[:,1]=pred_ctr_y-0.5*pred_h
        pred_boxes[:,2]=pred_ctr_x+0.5*pred_w-1
        pred_boxes[:,3]=pred_ctr_y+0.5*pred_h-1
        
        return pred_boxes
    
    
    
    

In [147]:
class RPN_head(nn.Module):
    
    def __init__(self,in_channel,num_anchors):
        
        super(RPN_head,self).__init__()
        
        self.conv=nn.Conv2d(in_channel,in_channel,kernel_size=3,stride=1,padding=1)
        self.cls_logits=nn.Conv2d(in_channel,num_anchors,kernel_size=1,stride=1)
        self.box_pred=nn.Conv2d(in_channel,num_anchors*4,kernel_size=1,stride=1)
        
        for l in [self.conv,self.cls_logits,self.box_pred]:
            
            nn.init.normal_(l.weight,std=0.01)
            
            nn.init.constant_(l.bias,0)
            
    def forward(self,x):
        
        logits=[]
        box_reg=[]
        
        for feature in x:
            t=F.relu(self.conv(feature))
            logits.append(self.cls_logits(t))
            box_reg.append(self.box_pred(t))
        return logits,box_reg
