<a href="https://colab.research.google.com/github/yoneken1/colab_pytorch_detection/blob/master/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
from torch import nn
from torchvision.models import vgg16,squeezenet1_1
from anchor import Anchor

class GLDet(nn.Module):
  
  def __init__(self, num_classes):
    super(GLDet, self).__init__()
    self.num_classes = num_classes
    base_model = squeezenet1_1(pretrained=True)
    self.features = base_model.features
    self.anchor_gen = Anchor(scale_ratios = [1/2., 1., 1.5])
   
    
    for p in self.features[0].parameters():
          p.requires_grad = False 
    for p in self.features[3].parameters():
          p.requires_grad = False 
        
    num_anchors = self.anchor_gen.num_anchors
        
    self.conv1 = nn.Conv2d(512, 512, 3, 1, 1)
    self.relu1 = nn.ReLU()
    self.loc = nn.Conv2d(512, num_anchors * 4 * num_classes, 1, 1, 0)
    self.cls = nn.Conv2d(512, num_anchors * (num_classes+1), 1, 1, 0)
    
    nn.init.kaiming_normal_(self.conv1.weight, mode='fan_out', nonlinearity='relu')
    self.conv1.bias.data.zero_()
    self.loc.weight.data.normal_(0.0, 0.001)
    self.loc.bias.data.zero_()
    self.cls.weight.data.normal_(0.0, 0.01)
    self.cls.bias.data.zero_()
    
    
  def forward(self,x):
    fm = self.features(x)
    h = self.relu1(self.conv1(fm))
    pred_loc = self.loc(h)
    pred_cls = self.cls(h)
    pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4 * self.num_classes)
    pred_cls = pred_cls.permute(0, 2, 3, 1).contiguous().view(pred_cls.size(0), -1, self.num_classes+1)
    
    fm_size = fm.size()
    anchor = self.anchor_gen.get_anchor_boxes([fm_size])
    
    return pred_loc,pred_cls,anchor
    
#device = torch.device('cuda')
device = torch.device('cpu')
    
def test():
  x = torch.randn((1,3,600,600))
  model = GLDet(6)
  x = x.to(device)
  model = model.to(device)
  pred_loc,pred_cls,anchor = model(x)
  
test()

  init.kaiming_uniform(m.weight.data)
  init.normal(m.weight.data, mean=0.0, std=0.01)


In [0]:
!pip install pretrainedmodels



In [0]:
import pretrainedmodels
import torch
from torch import nn

model_name = 'se_resnext50_32x4d' 

base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
features = nn.Sequential(base_model.layer0, base_model.layer1,base_model.layer2, base_model.layer3, base_model.layer4)

print(base_model)

for p in features[0].parameters():
  p.requires_grad = False
  
for p in features[1].parameters():
  p.requires_grad = False
  

#base_model = squeezenet1_1(pretrained=True)
#features = base_model.features
#for p in features[0].parameters():
#  p.requires_grad = False 
#for p in features[3].parameters():
#  p.requires_grad = False 


Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth" to /root/.torch/models/se_resnext50_32x4d-a260b3a4.pth
100%|██████████| 110559176/110559176 [00:06<00:00, 16722246.95it/s]


SENet(
  (layer0): Sequential(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  )
  (layer1): Sequential(
    (0): SEResNeXtBottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (se_module): SEModule(
        (avg_po

In [0]:
import torch
from torch import nn
from torchvision.models import vgg16,squeezenet1_1
from anchor import Anchor

class GCDet(nn.Module):
  
  def __init__(self, num_classes, model='senet'):
    super(GCDet, self).__init__()
    self.num_classes = num_classes
    self.model = model
    
    if model == 'senet':
      model_name = 'se_resnext50_32x4d' 
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.features = nn.Sequential(base_model.layer0, base_model.layer1,base_model.layer2, base_model.layer3)

      for p in self.features[0].parameters():
        p.requires_grad = False
  
      for p in self.features[1].parameters():
        p.requires_grad = False
      in_plane = 1024
      h_plane = 256


    else:
      base_model = squeezenet1_1(pretrained=True)
      self.features = base_model.features
      for p in self.features[0].parameters():
        p.require_grad = False 
      for p in self.features[3].parameters():
            p.require_grad = False 
      in_plane = 512
      h_plane = 256


    self.anchor_gen = Anchor()
        
    num_anchors = self.anchor_gen.num_anchors
        
    #self.conv1 = nn.Conv2d(in_plane, h_plane, 3, 1, 1)
    #self.relu1 = nn.ReLU(True)
    #head_layer = []
    #for _ in range(2):
    #  head_layer.append(nn.Conv2d(h_plane, h_plane, 3, 1, 1))
    #  head_layer.append(nn.ReLU(True))
    #self.head = nn.Sequential(*head_layer)
    #self.loc = nn.Conv2d(h_plane, num_anchors * 4 * num_classes, 3, 1, 1)
    #self.cls = nn.Conv2d(h_plane, num_anchors * (num_classes+1), 3, 1, 1)
    
    self.loc_head = self._make_head_layer(in_plane, h_plane, 3)
    self.cls_head = self._make_head_layer(in_plane, h_plane, 3)
    self.loc = nn.Conv2d(h_plane, num_anchors * 4 * num_classes, 3, 1, 1)
    self.cls = nn.Conv2d(h_plane, num_anchors * (num_classes+1), 3, 1, 1)
    
    #nn.init.kaiming_normal_(self.conv1.weight, mode='fan_out', nonlinearity='relu')
    #self.conv1.bias.data.zero_()
    for m in self.loc_head.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
    for m in self.cls_head.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
    
    self.loc.weight.data.normal_(0.0, 0.001)
    self.loc.bias.data.zero_()
    self.cls.weight.data.normal_(0.0, 0.01)
    self.cls.bias.data.zero_()
    
  def _make_head_layer(self, in_plane, h_plane, num_itr):
    head_layer = []
    head_layer.append(nn.Conv2d(in_plane, h_plane, 3, 1, 1))
    head_layer.append(nn.ReLU(True))
    for _ in range(num_itr):
      head_layer.append(nn.Conv2d(h_plane, h_plane, 3, 1, 1))
      head_layer.append(nn.ReLU(True))
    
    return nn.Sequential(*head_layer)
    
  def train(self, mode=True):
    super(GCDet, self).train(mode)
    #freeze bn
    if self.model == 'senet':
      if mode:
        for layer in self.modules():
          if isinstance(layer, nn.BatchNorm2d):
            layer.eval()
    
  def forward(self,x):
    fm = self.features(x)
    loc_h = self.loc_head(fm)
    cls_h = self.cls_head(fm) 
    pred_loc = self.loc(loc_h)
    pred_cls = self.cls(cls_h)
    pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4 * self.num_classes)
    pred_cls = pred_cls.permute(0, 2, 3, 1).contiguous().view(pred_cls.size(0), -1, self.num_classes+1)
    
    fm_size = fm.size()
    anchor = self.anchor_gen.get_anchor_boxes([fm_size])
    
    return pred_loc,pred_cls,anchor
    
device = torch.device('cuda')

def testgc():
  x = torch.randn((1,3,600,900))
  model = GCDet(6)
  x = x.to(device)
  model = model.to(device)
  model.train()
  pred_loc,pred_cls,anchor = model(x)
  print(pred_loc.size())
  print(pred_cls.size())
  print(anchor.size())
  print(anchor[0])
  print(anchor[9])
  print(anchor[18])
  print(anchor[57*9])
  print(anchor[57*9+9])
  print(anchor[57*9-9])
  
testgc()

torch.Size([1, 19494, 24])
torch.Size([1, 19494, 7])
torch.Size([19494, 4])
tensor([-56., -56.,  71.,  71.])
tensor([-40., -56.,  87.,  71.])
tensor([-24., -56., 103.,  71.])
tensor([-56., -40.,  71.,  87.])
tensor([-40., -40.,  87.,  87.])
tensor([840., -56., 967.,  71.])


In [0]:
import torch
from torch import nn
from torchvision.models import vgg16,squeezenet1_1,resnet34
from anchor import Anchor


class GCDet(nn.Module):
  
  def __init__(self, num_classes, model='resnet', class_agnostic=False):
    super(GCDet, self).__init__()
    self.num_classes = num_classes
    self.model = model
    self.class_agnostic=class_agnostic
    
    if model == 'resnet':
      model_name = 'resnet50' 
      
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.conv1,base_model.bn1,base_model.relu,base_model.maxpool, base_model.layer1)
      self.layer2 = base_model.layer2
      self.layer3 = base_model.layer3
      self.layer4 = base_model.layer4
      self.latlayer4 = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)
      self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
      self.toplayer3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
      self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0)
      self.toplayer2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
      self.conv5 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)
      
      #32 64 128 256
      
      for p in self.layer1.parameters():
        p.requires_grad = False
  
      for p in self.layer2.parameters():
        p.requires_grad = False
        
      for p in self.layer3.parameters():
        p.requires_grad = False

    anchor_areas = [ 64*64., 128*128., 256*256., 512*512.] 
    aspect_ratios = [1/1.,1/2., 2/1.]
    scale_ratios = [1., 1./pow(2.,1/3.), 1./pow(2.,2/3.)]
    anchor_base_sizes = [8., 16., 32., 64. ]
    self.anchor_gen = Anchor(anchor_areas=anchor_areas,
                            aspect_ratios= aspect_ratios,
                            scale_ratios = scale_ratios,
                            anchor_base_sizes = anchor_base_sizes)
        
    num_anchors = self.anchor_gen.num_anchors
        
    self.loc_head = self._make_head_layer(256, 256, 2)
    self.cls_head = self._make_head_layer(256, 256, 2)
    if self.class_agnostic:
      self.loc = nn.Conv2d(256, num_anchors * 4, 3, 1, 1)
    else:
      self.loc = nn.Conv2d(256, num_anchors * 4 * num_classes, 3, 1, 1)
    self.cls = nn.Conv2d(256, num_anchors * (num_classes+1), 3, 1, 1)
    
    self._normal_init(self.loc_head)
    self._normal_init(self.cls_head)
    self._normal_init(self.latlayer4)
    self._normal_init(self.latlayer3)
    self._normal_init(self.toplayer3)
    self._normal_init(self.latlayer2)
    self._normal_init(self.toplayer2)
    self._normal_init(self.conv5)
   
    
    self.loc.weight.data.normal_(0.0, 0.001)
    self.loc.bias.data.zero_()
    self.cls.weight.data.normal_(0.0, 0.01)
    self.cls.bias.data.zero_()
    
  def _make_head_layer(self, in_plane, h_plane, num_itr):
    head_layer = []
    head_layer.append(nn.Conv2d(in_plane, h_plane, 3, 1, 1))
    head_layer.append(nn.ReLU(True))
    for _ in range(num_itr):
      head_layer.append(nn.Conv2d(h_plane, h_plane, 3, 1, 1))
      head_layer.append(nn.ReLU(True))
    
    return nn.Sequential(*head_layer)
  
  def _normal_init(self, layer):
    for m in layer.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
    
  def train(self, mode=True):
    super(GCDet, self).train(mode)
    #freeze bn
    if self.model == 'resnet':
      if mode:
        for layer in self.modules():
          if isinstance(layer, nn.BatchNorm2d):
            layer.eval()
            
  def _upsample_add(self, x, y):
    _,_,H,W = y.size()
    return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y
    
  def forward(self,x):
    c1 = self.layer1(x)
    c2 = self.layer2(c1) #8
    c3 = self.layer3(c2) #16
    c4 = self.layer4(c3) #32
    
    p5 = self.conv5(c4) #64
    p4 = self.latlayer4(c4)
    p3 = self._upsample_add(p4, self.latlayer3(c3))
    p3 = self.toplayer3(p3)
    p2 = self._upsample_add(p3, self.latlayer2(c2))
    p2 = self.toplayer2(p2)
    
    pred_locs = []
    pred_clss = []
    fm_size = []
    for fm in [p2,p3,p4,p5]:
      loc_h = self.loc_head(fm)
      cls_h = self.cls_head(fm) 
      pred_loc = self.loc(loc_h)
      pred_cls = self.cls(cls_h)
      if self.class_agnostic:
        pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4)
      else:
        pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4 * self.num_classes)
      pred_cls = pred_cls.permute(0, 2, 3, 1).contiguous().view(pred_cls.size(0), -1, self.num_classes+1)
    
      pred_locs.append(pred_loc)
      pred_clss.append(pred_cls)
      fm_size.append(fm.size())
    pred_locs = torch.cat(pred_locs,1)
    pred_clss = torch.cat(pred_clss,1)
    anchor = self.anchor_gen.get_anchor_boxes(fm_size)
    
    return pred_locs, pred_clss,anchor
    
#device = torch.device('cuda')

def testgc():
  x = torch.randn((1,3,600,900))
  model = GCDet(6)
  x = x.to(device)
  model = model.to(device)
  model.train()
  pred_loc,pred_cls,anchor = model(x)
  print(pred_loc[0].size())
  print(pred_cls[0].size())
  print(anchor.size())
  print(anchor[0])
  print(anchor[9])
  print(anchor[18])
  print(anchor[57*9])
  print(anchor[57*9+9])
  print(anchor[57*9-9])
  
testgc()

torch.Size([1, 1024, 38, 57])


  "See the documentation of nn.Upsample for details.".format(mode))


torch.Size([68052, 24])
torch.Size([68052, 7])
torch.Size([68052, 4])
tensor([-14., -14.,  17.,  17.])
tensor([-10., -30.,  21.,  33.])
tensor([ -2., -14.,  29.,  17.])
tensor([326., -30., 357.,  33.])
tensor([334., -14., 365.,  17.])
tensor([322., -14., 353.,  17.])


Anchorを修正（ｔ１７）

headをlとsに分ける


In [0]:
import torch
from torch import nn
from torchvision.models import vgg16,squeezenet1_1,resnet34
from anchor import Anchor
import pretrainedmodels
device = torch.device('cpu')

class GCDet(nn.Module):
  
  def __init__(self, num_classes, model='resnet', class_agnostic=False):
    super(GCDet, self).__init__()
    self.num_classes = num_classes
    self.model = model
    self.class_agnostic=class_agnostic
    
    if model == 'resnet':
      model_name = 'resnet50' 
      
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.conv1,base_model.bn1,base_model.relu,base_model.maxpool, base_model.layer1)
      self.layer2 = base_model.layer2
      self.layer3 = base_model.layer3
      self.layer4 = base_model.layer4
      self.latlayer4 = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)
      self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
      self.toplayer3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
      self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0)
      self.toplayer2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
      self.conv5 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)
      self.avg_pool_1x1 = nn.AdaptiveAvgPool2d((1,1))
      self.avg_pool_3x3 = nn.AdaptiveAvgPool2d((3,3))
      self.avg_pool_5x5 = nn.AdaptiveAvgPool2d((5,5))
      self.rconv1 = nn.Conv2d(2048, 64, kernel_size=1, stride=1, padding=0)
      self.rconv3 = nn.Conv2d(2048, 64, kernel_size=1, stride=1, padding=0)
      self.rconv5 = nn.Conv2d(2048, 64, kernel_size=1, stride=1, padding=0)
      
      #32 64 128 256
      
      for p in self.layer1.parameters():
        p.requires_grad = False
  
      for p in self.layer2.parameters():
        p.requires_grad = False
        
    #  for p in self.layer3.parameters():
    #    p.requires_grad = False

    anchor_areas = [ 64*64., 128*128., 256*256., 512*512.] 
    aspect_ratios = [1/1.,1/2., 2/1.]
    scale_ratios = [1., 1./pow(2.,1/3.), 1./pow(2.,2/3.)]
    anchor_base_sizes = [8., 16., 32., 64. ]
    self.anchor_gen = Anchor(anchor_areas=anchor_areas,
                            aspect_ratios= aspect_ratios,
                            scale_ratios = scale_ratios,
                            anchor_base_sizes = anchor_base_sizes)
        
    num_anchors = self.anchor_gen.num_anchors
        
    self.loc_head_l = self._make_head_layer(256+35, 256, 3)
    self.cls_head_l = self._make_head_layer(256+35, 256, 3)
    self.loc_head_s = self._make_head_layer(256+35, 256, 3)
    self.cls_head_s = self._make_head_layer(256+35, 256, 3)
    
    if self.class_agnostic:
      loc_out_plane = num_anchors * 4
    else:
      loc_out_plane = num_anchors * 4 * num_classes
    self.loc_l = nn.Conv2d(256, loc_out_plane, 3, 1, 1)
    self.cls_l = nn.Conv2d(256, num_anchors * (num_classes+1), 3, 1, 1)
    self.loc_s = nn.Conv2d(256, loc_out_plane, 3, 1, 1)
    self.cls_s = nn.Conv2d(256, num_anchors * (num_classes+1), 3, 1, 1)
    
    self._normal_init(self.loc_head_l)
    self._normal_init(self.cls_head_l)
    self._normal_init(self.loc_head_s)
    self._normal_init(self.cls_head_s)
    self._normal_init(self.latlayer4)
    self._normal_init(self.latlayer3)
    self._normal_init(self.toplayer3)
    self._normal_init(self.latlayer2)
    self._normal_init(self.toplayer2)
    self._normal_init(self.conv5)
    self._normal_init(self.rconv1)
    self._normal_init(self.rconv3)
    self._normal_init(self.rconv5)
    
    self.loc_l.weight.data.normal_(0.0, 0.001)
    self.loc_l.bias.data.zero_()
    self.cls_l.weight.data.normal_(0.0, 0.01)
    self.cls_l.bias.data.zero_()
    self.loc_s.weight.data.normal_(0.0, 0.001)
    self.loc_s.bias.data.zero_()
    self.cls_s.weight.data.normal_(0.0, 0.01)
    self.cls_s.bias.data.zero_()
    
  def _make_head_layer(self, in_plane, h_plane, num_itr):
    head_layer = []
    head_layer.append(nn.Conv2d(in_plane, h_plane, 3, 1, 1))
    head_layer.append(nn.ReLU(True))
    for _ in range(num_itr):
      head_layer.append(nn.Conv2d(h_plane, h_plane, 3, 1, 1))
      head_layer.append(nn.ReLU(True))
    
    return nn.Sequential(*head_layer)
  
  def _normal_init(self, layer):
    for m in layer.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
              
  def _upsample_add(self, x, y):
    _,_,H,W = y.size()
    return nn.functional.interpolate(x, size=(H,W), mode='bilinear') + y
  
  def _upsample_cat(self, x, y):
    _,_,H,W = y.size()
    layers = []
    layers.append(y)
    for sx in x:
       layers.append(nn.functional.interpolate(sx, size=(H,W), mode='bilinear'))
    return torch.cat(layers,1)
    
  def train(self, mode=True):
    super(GCDet, self).train(mode)
    #freeze bn
    if self.model == 'resnet':
      if mode:
        for layer in self.modules():
          if isinstance(layer, nn.BatchNorm2d):
            layer.eval()
            

    
  def forward(self,x):
    c1 = self.layer1(x)
    c2 = self.layer2(c1) #8
    c3 = self.layer3(c2) #16
    c4 = self.layer4(c3) #32
    
    p5 = self.conv5(c4) #64
    p4 = self.latlayer4(c4)
    p3 = self._upsample_add(p4, self.latlayer3(c3))
    p3 = self.toplayer3(p3)
    p2 = self._upsample_add(p3, self.latlayer2(c2))
    p2 = self.toplayer2(p2)
    
    rmap1x1 = self.rconv1(self.avg_pool_1x1(c4))
    rmap3x3 = self.rconv3(self.avg_pool_3x3(c4))
    rmap5x5 = self.rconv5(self.avg_pool_5x5(c4))
    rmap1x1 = rmap1x1.permute(0,2,3,1).contiguous().view(rmap1x1.size(0),-1,8,8)
    rmap3x3 = rmap3x3.permute(0,2,3,1).contiguous().view(rmap3x3.size(0),-1,8,8)
    rmap5x5 = rmap5x5.permute(0,2,3,1).contiguous().view(rmap5x5.size(0),-1,8,8)
    
    
    p2 = self._upsample_cat([rmap1x1,rmap3x3,rmap5x5],p2)
    p3 = self._upsample_cat([rmap1x1,rmap3x3,rmap5x5],p3)
    p4 = self._upsample_cat([rmap1x1,rmap3x3,rmap5x5],p4)
    p5 = self._upsample_cat([rmap1x1,rmap3x3,rmap5x5],p5)
    
    pred_locs = []
    pred_clss = []
    fm_size = []
    for fm_idx, fm in enumerate([p2,p3,p4,p5]):
      if fm_idx >= 2:
        loc_h = self.loc_head_l(fm)
        cls_h = self.cls_head_l(fm) 
        pred_loc = self.loc_l(loc_h)
        pred_cls = self.cls_l(cls_h)
      else:
        loc_h = self.loc_head_s(fm)
        cls_h = self.cls_head_s(fm) 
        pred_loc = self.loc_s(loc_h)
        pred_cls = self.cls_s(cls_h)
        
      if self.class_agnostic:
        pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4)
      else:
        pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4 * self.num_classes)
      pred_cls = pred_cls.permute(0, 2, 3, 1).contiguous().view(pred_cls.size(0), -1, self.num_classes+1)
    
      pred_locs.append(pred_loc)
      pred_clss.append(pred_cls)
      fm_size.append(fm.size())
    pred_locs = torch.cat(pred_locs,1)
    pred_clss = torch.cat(pred_clss,1)
    anchor = self.anchor_gen.get_anchor_boxes(fm_size)
    
    return pred_locs, pred_clss,anchor
    
#device = torch.device('cuda')

def testgc():
  x = torch.randn((1,3,600,900))
  model = GCDet(6)
  x = x.to(device)
  model = model.to(device)
  model.train()
  pred_loc,pred_cls,anchor = model(x)
  print(pred_loc[0].size())
  print(pred_cls[0].size())
  print(anchor.size())
  print(anchor[0])
  print(anchor[9])
  print(anchor[18])
  print(anchor[57*9])
  print(anchor[57*9+9])
  print(anchor[57*9-9])
  
testgc()

tensor([-56., -56.,  71.,  71.])
tensor([-40., -56.,  87.,  71.])
tensor([-24., -56., 103.,  71.])
tensor([-56., -40.,  71.,  87.])
tensor([-56., -56.,  71.,  71.])
tensor([-40., -56.,  87.,  71.])
tensor([-24., -56., 103.,  71.])
tensor([ -8., -56., 119.,  71.])


Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.torch/models/resnet50-19c8e357.pth
100%|██████████| 102502400/102502400 [00:03<00:00, 28015792.04it/s]
  "See the documentation of nn.Upsample for details.".format(mode))


torch.Size([102078, 24])
torch.Size([102078, 7])
torch.Size([102078, 4])
tensor([-28., -28.,  35.,  35.])
tensor([-20., -28.,  43.,  35.])
tensor([-12., -28.,  51.,  35.])
tensor([428., -28., 491.,  35.])
tensor([436., -28., 499.,  35.])
tensor([420., -28., 483.,  35.])


In [0]:
import torch
from torch import nn
from torchvision.models import vgg16,squeezenet1_1
from anchor import Anchor

device=torch.device('cpu')

class GCDet(nn.Module):
  
  def __init__(self, num_classes, model='resnet', class_agnostic=False):
    super(GCDet, self).__init__()
    self.num_classes = num_classes
    self.model = model
    self.class_agnostic=class_agnostic
    
    if model == 'resnet':
      model_name = 'resnet50' 
      
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.conv1,base_model.bn1,base_model.relu,base_model.maxpool, base_model.layer1)
      self.layer2 = base_model.layer2
      self.layer3 = base_model.layer3
      self.layer4 = base_model.layer4
      self.latlayer4 = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)
      self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
      self.toplayer3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
      self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0)
      self.toplayer2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
      self.conv5 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)
      self.avg_pool_1x1 = nn.AdaptiveAvgPool2d((1,1))
#      self.avg_pool_3x3 = nn.AdaptiveAvgPool2d((3,3))
#      self.avg_pool_5x5 = nn.AdaptiveAvgPool2d((5,5))
      self.rconv1 = nn.Conv2d(2048, 12, kernel_size=1, stride=1, padding=0)
#      self.rconv3 = nn.Conv2d(2048, 12, kernel_size=1, stride=1, padding=0)
#      self.rconv5 = nn.Conv2d(2048, 12, kernel_size=1, stride=1, padding=0)
      
      #32 64 128 256
      
      for p in self.layer1.parameters():
        p.requires_grad = False
  
      for p in self.layer2.parameters():
        p.requires_grad = False
        
    #  for p in self.layer3.parameters():
    #    p.requires_grad = False

    anchor_areas = [ 64*64., 128*128., 256*256., 512*512.] 
    aspect_ratios = [1/1.,1/2., 2/1.]
    scale_ratios = [1., 1./pow(2.,1/3.), 1./pow(2.,2/3.)]
    anchor_base_sizes = [8., 16., 32., 64. ]
    self.anchor_gen = Anchor(anchor_areas=anchor_areas,
                            aspect_ratios= aspect_ratios,
                            scale_ratios = scale_ratios,
                            anchor_base_sizes = anchor_base_sizes)
        
    num_anchors = self.anchor_gen.num_anchors

    self.ar_loc_head = self._make_head_layer(256+1, 256, 3)
    self.ar_cls_head = self._make_head_layer(256+1, 256, 3)
    self.ar_loc_relu = nn.ReLU(True)
    self.ar_cls_relu = nn.ReLU(True)
    self.ar_loc = nn.Conv2d(256, num_anchors * 4, 3, 1, 1)
    self.ar_cls = nn.Conv2d(256, num_anchors * 2, 3, 1, 1)
    
    
    self.loc_head = self._make_head_layer(256, 256, 4)
    self.cls_head = self._make_head_layer(256, 256, 4)
#    self.loc_head_s = self._make_head_layer(256+1, 256, 3)
#    self.cls_head_s = self._make_head_layer(256+1, 256, 3)
    
    if self.class_agnostic:
      loc_out_plane = num_anchors * 4
    else:
      loc_out_plane = num_anchors * 4 * num_classes
    self.loc = nn.Conv2d(256, loc_out_plane, 3, 1, 1)
    self.cls = nn.Conv2d(256, num_anchors * (num_classes+1), 3, 1, 1)
#    self.loc_s = nn.Conv2d(256, loc_out_plane, 3, 1, 1)
#    self.cls_s = nn.Conv2d(256, num_anchors * (num_classes+1), 3, 1, 1)
    
    self._normal_init(self.ar_loc_head)
    self._normal_init(self.ar_cls_head)
    self._normal_init(self.loc_head)
    self._normal_init(self.cls_head)
#    self._normal_init(self.loc_head_s)
#    self._normal_init(self.cls_head_s)
    self._normal_init(self.latlayer4)
    self._normal_init(self.latlayer3)
    self._normal_init(self.toplayer3)
    self._normal_init(self.latlayer2)
    self._normal_init(self.toplayer2)
    self._normal_init(self.conv5)
    self._normal_init(self.rconv1)
#    self._normal_init(self.rconv3)
#    self._normal_init(self.rconv5)

    self.ar_loc.weight.data.normal_(0.0, 0.001)
    self.ar_loc.bias.data.zero_()
    self.ar_cls.weight.data.normal_(0.0, 0.01)
    self.ar_cls.bias.data.zero_()
    self.loc.weight.data.normal_(0.0, 0.001)
    self.loc.bias.data.zero_()
    self.cls.weight.data.normal_(0.0, 0.01)
    self.cls.bias.data.zero_()
#    self.loc_s.weight.data.normal_(0.0, 0.001)
#    self.loc_s.bias.data.zero_()
#    self.cls_s.weight.data.normal_(0.0, 0.01)
#    self.cls_s.bias.data.zero_()
    
  def _make_head_layer(self, in_plane, h_plane, num_itr):
    head_layer = []
    
    for i in range(num_itr):
      if i == 0:
        head_layer.append(nn.Conv2d(in_plane, h_plane, 3, 1, 1))
      else:
        head_layer.append(nn.Conv2d(h_plane, h_plane, 3, 1, 1))
      head_layer.append(nn.ReLU(True))
    
    return nn.Sequential(*head_layer)
  
  def _normal_init(self, layer):
    for m in layer.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
              
  def _upsample_add(self, x, y):
    _,_,H,W = y.size()
    return nn.functional.interpolate(x, size=(H,W), mode='bilinear') + y
  
  def _upsample_cat(self, x, y):
    _,_,H,W = y.size()
    layers = []
    layers.append(y)
    for sx in x:
       layers.append(nn.functional.interpolate(sx, size=(H,W), mode='bilinear'))
    return torch.cat(layers,1)
    
  def train(self, mode=True):
    super(GCDet, self).train(mode)
    #freeze bn
    if self.model == 'resnet':
      if mode:
        for layer in self.modules():
          if isinstance(layer, nn.BatchNorm2d):
            layer.eval()
            

    
  def forward(self,x):
    c1 = self.layer1(x)
    c2 = self.layer2(c1) #8
    c3 = self.layer3(c2) #16
    c4 = self.layer4(c3) #32
    
    p5 = self.conv5(c4) #64
    p4 = self.latlayer4(c4)
    p3 = self._upsample_add(p4, self.latlayer3(c3))
    p3 = self.toplayer3(p3)
    p2 = self._upsample_add(p3, self.latlayer2(c2))
    p2 = self.toplayer2(p2)
    
    rmap1x1 = self.rconv1(self.avg_pool_1x1(c4))
    #rmap3x3 = self.rconv3(self.avg_pool_3x3(c4))
    #rmap5x5 = self.rconv5(self.avg_pool_5x5(c4))
    rmap1x1 = rmap1x1.permute(0,2,3,1).contiguous().view(rmap1x1.size(0),-1,3,4)
    #rmap3x3 = rmap3x3.permute(0,2,3,1).contiguous().view(rmap3x3.size(0),-1,3,4)
    #rmap5x5 = rmap5x5.permute(0,2,3,1).contiguous().view(rmap5x5.size(0),-1,3,4)
    
    p2 = self._upsample_cat([rmap1x1],p2)
    p3 = self._upsample_cat([rmap1x1],p3)
    p4 = self._upsample_cat([rmap1x1],p4)
    p5 = self._upsample_cat([rmap1x1],p5)
    
    ar_locs = []
    ar_clss = []
    pred_locs = []
    pred_clss = []
    fm_size = []
    for fm_idx, fm in enumerate([p2,p3,p4,p5]):
      
      ar_loc_h = self.ar_loc_head(fm)
      ar_cls_h = self.ar_cls_head(fm)
      pred_ar_loc = self.ar_loc(ar_loc_h)
      pred_ar_cls = self.ar_cls(ar_cls_h)
      pred_ar_loc = pred_ar_loc.permute(0, 2, 3, 1).contiguous().view(pred_ar_loc.size(0), -1, 4)
      pred_ar_cls = pred_ar_cls.permute(0, 2, 3, 1).contiguous().view(pred_ar_cls.size(0), -1, 2)
      
     # loc_h = self.loc_head(fm)
     # cls_h = self.cls_head(fm) 
      loc_h = self.loc_head(self.ar_loc_relu(ar_loc_h))
      cls_h = self.cls_head(self.ar_cls_relu(ar_cls_h)) 
      pred_loc = self.loc(loc_h)
      pred_cls = self.cls(cls_h)
        
      if self.class_agnostic:
        pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4)
      else:
        pred_loc = pred_loc.permute(0, 2, 3, 1).contiguous().view(pred_loc.size(0), -1, 4 * self.num_classes)
      pred_cls = pred_cls.permute(0, 2, 3, 1).contiguous().view(pred_cls.size(0), -1, self.num_classes+1)
    
      pred_locs.append(pred_loc)
      pred_clss.append(pred_cls)
      ar_locs.append(pred_ar_loc)
      ar_clss.append(pred_ar_cls)
      fm_size.append(fm.size())
    pred_locs = torch.cat(pred_locs,1)
    pred_clss = torch.cat(pred_clss,1)
    ar_locs = torch.cat(ar_locs,1)
    ar_clss = torch.cat(ar_clss,1)
    anchor = self.anchor_gen.get_anchor_boxes(fm_size)
    
    return pred_locs, pred_clss, anchor, ar_locs, ar_clss
  
def testgc():
  x = torch.randn((1,3,600,900))
  model = GCDet(6)
  x = x.to(device)
  model = model.to(device)
  model.train()
  pred_loc,pred_cls,anchor, ar_loc, ar_cls = model(x)
  print(pred_loc[0].size())
  print(pred_cls[0].size())
  print(ar_loc[0].size())
  print(ar_cls[0].size())
  print(anchor.size())
  print(anchor[0])
  print(anchor[9])
  print(anchor[18])
  print(anchor[57*9])
  print(anchor[57*9+9])
  print(anchor[57*9-9])

testgc()

  "See the documentation of nn.Upsample for details.".format(mode))


torch.Size([102078, 24])
torch.Size([102078, 7])
torch.Size([102078, 4])
torch.Size([102078, 2])
torch.Size([102078, 4])
tensor([-28., -28.,  35.,  35.])
tensor([-20., -28.,  43.,  35.])
tensor([-12., -28.,  51.,  35.])
tensor([428., -28., 491.,  35.])
tensor([436., -28., 499.,  35.])
tensor([420., -28., 483.,  35.])


In [0]:
import torch
import torch.nn as nn
import torch.nn.init as init

class L2Norm(nn.Module):
    def __init__(self,n_channels, scale):
        super(L2Norm,self).__init__()
        self.n_channels = n_channels
        self.gamma = scale or None
        self.eps = 1e-10
        self.weight = nn.Parameter(torch.Tensor(self.n_channels))
        self.reset_parameters()

    def reset_parameters(self):
        init.constant(self.weight,self.gamma)

    def forward(self, x):
        norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
        x /= norm
        out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
        return out

In [0]:
from torchvision.models.resnet import BasicBlock, Bottleneck
from anchor import Anchor
import pretrainedmodels
device = torch.device('cpu')

def conv1x1(in_planes, out_planes, stride=1):
  """1x1 convolution"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

class GCRefDet(nn.Module):
  
  def __init__(self, num_classes, model='resnet18', class_agnostic=False):
    super(GCRefDet, self).__init__()
    self.num_classes = num_classes
    self.model = model
    self.class_agnostic=class_agnostic
    
    if (model == 'resnet18') or (model == 'resnet34') :
      model_name = model 
      layer5_num = 2
      plane_size = [128,256,512,512]
      self.inplanes = 512
      self.layer5 = self._make_residual_layer(BasicBlock, 512, layer5_num, stride=2) #1/64
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.conv1,base_model.bn1,base_model.relu,base_model.maxpool, base_model.layer1) #1/4
      
    elif model == 'senet':
      model_name = 'se_resnext50_32x4d'
      layer5_num = 3
      plane_size = [512,1024,2048,512]
      self.inplanes = 2048
      self.layer5 = self._make_residual_layer(Bottleneck, 128, layer5_num, stride=2) #1/64
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.layer0, base_model.layer1) #1/4
  

    self.layer2 = base_model.layer2 #1/8
    self.layer3 = base_model.layer3 #1/16
    self.layer4 = base_model.layer4 #1/32
     
      
      
    for p in self.layer1.parameters():
      p.requires_grad = False
  
    for p in self.layer2.parameters():
      p.requires_grad = False
        
    for p in self.layer3.parameters():
      p.requires_grad = False

    anchor_areas = [ 32*32., 64*64., 128*128., 256*256.] 
    aspect_ratios = [1/1.,1/2., 2/1.]
    scale_ratios = [1.]
    anchor_base_sizes = [8., 16., 32., 64. ]
    self.anchor_gen = Anchor(anchor_areas=anchor_areas,
                            aspect_ratios= aspect_ratios,
                            scale_ratios = scale_ratios,
                            anchor_base_sizes = anchor_base_sizes)
        
    num_anchors = self.anchor_gen.num_anchors

    self.ar_loc_heads = nn.ModuleList([
        self._make_head_layer(plane_size[0], num_anchors * 4),
        self._make_head_layer(plane_size[1], num_anchors * 4),
        self._make_head_layer(plane_size[2], num_anchors * 4),
        self._make_head_layer(plane_size[3], num_anchors * 4)
    ])
    
    self.ar_cls_heads = nn.ModuleList([
        self._make_head_layer(plane_size[0], num_anchors * 2),
        self._make_head_layer(plane_size[1], num_anchors * 2),
        self._make_head_layer(plane_size[2], num_anchors * 2),
        self._make_head_layer(plane_size[3], num_anchors * 2)
    ])
    
    self.od_loc_heads = nn.ModuleList([
        self._make_head_layer(256, num_anchors * 4),
        self._make_head_layer(256, num_anchors * 4),
        self._make_head_layer(256, num_anchors * 4),
        self._make_head_layer(256, num_anchors * 4)
    ])
    
    self.od_cls_heads = nn.ModuleList([
        self._make_head_layer(256, num_anchors * (num_classes+1)),
        self._make_head_layer(256, num_anchors * (num_classes+1)),
        self._make_head_layer(256, num_anchors * (num_classes+1)),
        self._make_head_layer(256, num_anchors * (num_classes+1))
    ])
    
    self.trans4 = nn.Sequential(nn.Conv2d(plane_size[3], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
    self.trans3 = nn.Sequential(nn.Conv2d(plane_size[2], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
    self.trans2 = nn.Sequential(nn.Conv2d(plane_size[1], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
    self.trans1 =  nn.Sequential(nn.Conv2d(plane_size[0], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
                                     
    self.up4 = nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)
    self.up3 = nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)
    self.up2 = nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)

    self.latent4 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
    self.latent3 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
    self.latent2 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
    self.latent1 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
       
    self._normal_init(self.layer5)
    self._normal_init(self.ar_loc_heads)
    self._normal_init(self.ar_cls_heads)
    self._normal_init(self.od_loc_heads)
    self._normal_init(self.od_cls_heads)
    self._normal_init(self.trans4)
    self._normal_init(self.trans3)
    self._normal_init(self.trans2)
    self._normal_init(self.trans1)
    self._normal_init(self.up4)
    self._normal_init(self.up3)
    self._normal_init(self.up2)
    self._normal_init(self.latent4)
    self._normal_init(self.latent3)
    self._normal_init(self.latent2)
    self._normal_init(self.latent1)
    

  def _make_residual_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
        downsample = nn.Sequential(
            conv1x1(self.inplanes, planes * block.expansion, stride),
            nn.BatchNorm2d(planes * block.expansion),
        )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for _ in range(1, blocks):
        layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)
  
  def _make_head_layer(self, in_planes, out_planes, planes = 0, num_itr = 0):
    head_layer = []
    
    if(num_itr > 0):
      head_layer.append(nn.Conv2d(in_planes, planes, 3, 1, 1))
      head_layer.append(nn.ReLU(True))
      for i in range(1,num_itr):
        if i == num_itr - 1:
          head_layer.append(nn.Conv2d(planes, out_planes, 3, 1, 1))
        else:
          head_layer.append(nn.Conv2d(planes, planes, 3, 1, 1))
          head_layer.append(nn.ReLU(True))
    else:
      head_layer.append(nn.Conv2d(in_planes, out_planes, 3, 1, 1))
    
    return nn.Sequential(*head_layer)
  
  def _normal_init(self, layer):
    for m in layer.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
      if isinstance(m, nn.ConvTranspose2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
      if isinstance(m, Bottleneck):
          nn.init.constant_(m.bn3.weight, 0)
      elif isinstance(m, BasicBlock):
          nn.init.constant_(m.bn2.weight, 0)

              
  def _upsample_add(self, x, y):
    _,_,H,W = y.size()
    return nn.functional.interpolate(x, size=(H,W), mode='bilinear') + y
  
  def _upsample_cat(self, x, y):
    _,_,H,W = y.size()
    layers = []
    layers.append(y)
    for sx in x:
       layers.append(nn.functional.interpolate(sx, size=(H,W), mode='bilinear'))
    return torch.cat(layers,1)
    
  def train(self, mode=True):
    super(GCRefDet, self).train(mode)
    #freeze bn
    #if self.model == 'resnet18':
    #  if mode:
    #    for layer in self.modules():
    #      if isinstance(layer, nn.BatchNorm2d):
    #        layer.eval()
            

    
  def forward(self,x):
    c1 = self.layer1(x)
    c2 = self.layer2(c1) #8
    c3 = self.layer3(c2) #16
    c4 = self.layer4(c3) #32
    c5 = self.layer5(c4) #64
 
    ar_locs = []
    ar_clss = []
    fm_size = []
    for fm, ar_loc_head, ar_cls_head in zip([c2,c3,c4,c5], self.ar_loc_heads, self.ar_cls_heads):
      ar_loc_h = ar_loc_head(fm)
      ar_cls_h = ar_cls_head(fm)
      ar_loc_h = ar_loc_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, 4)
      ar_cls_h = ar_cls_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, 2)
      ar_locs.append(ar_loc_h)
      ar_clss.append(ar_cls_h)
      fm_size.append(fm.size())
    ar_locs = torch.cat(ar_locs,1)
    ar_clss = torch.cat(ar_clss,1)
    anchor = self.anchor_gen.get_anchor_boxes(fm_size)
    
    o4 = self.latent4(self.trans4(c5))
    o3 = self.latent3(self.trans3(c4) + self.up4(o4))
    o2 = self.latent2(self.trans2(c3) + self.up3(o3))
    o1 = self.latent1(self.trans1(c2) + self.up2(o2))
    
    od_locs = []
    od_clss = []
    for fm, od_loc_head, od_cls_head in zip([o1,o2,o3,o4], self.od_loc_heads, self.od_cls_heads):
      od_loc_h = od_loc_head(fm)
      od_cls_h = od_cls_head(fm)
      od_loc_h = od_loc_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, 4)
      od_cls_h = od_cls_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, self.num_classes+1)
      od_locs.append(od_loc_h)
      od_clss.append(od_cls_h)
    od_locs = torch.cat(od_locs,1)
    od_clss = torch.cat(od_clss,1)
    
    return ar_locs, ar_clss, od_locs, od_clss, anchor
  
def testgc():
  x = torch.randn((1,3, 576, 960))
  model = GCRefDet(6,model='senet')
  x = x.to(device)
  model = model.to(device)
  model.train()
  ar_loc, ar_cls, od_loc, od_cls, anchor = model(x)
  print(ar_loc[0].size())
  print(ar_cls[0].size())
  print(od_loc[0].size())
  print(od_cls[0].size())
  print(anchor.size())
  print(anchor[0])
  print(anchor[9])
  print(anchor[18])
  print(anchor[57*9])
  print(anchor[57*9+9])
  print(anchor[57*9-9])

testgc()

torch.Size([34425, 4])
torch.Size([34425, 2])
torch.Size([34425, 4])
torch.Size([34425, 7])
torch.Size([34425, 4])
tensor([-12., -12.,  19.,  19.])
tensor([ 12., -12.,  43.,  19.])
tensor([ 36., -12.,  67.,  19.])
tensor([396.,  -4., 427.,  27.])
tensor([420.,  -4., 451.,  27.])
tensor([372.,  -4., 403.,  27.])


In [0]:
for i in range(1,1):
  print(i)

In [0]:
from google.colab import files
uploaded = files.upload()

Saving anchor.py to anchor.py


In [0]:
from torchvision.models.resnet import BasicBlock, Bottleneck
from anchor import Anchor
import pretrainedmodels
device = torch.device('cpu')

def conv1x1(in_planes, out_planes, stride=1):
  """1x1 convolution"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

class GCRefDet(nn.Module):
  
  def __init__(self, num_classes, model='resnet18', class_agnostic=False):
    super(GCRefDet, self).__init__()
    self.num_classes = num_classes
    self.model = model
    self.class_agnostic=class_agnostic
    
    if (model == 'resnet18') or (model == 'resnet34') :
      model_name = model 
      layer5_num = 2
      plane_size = [128,256,512,512]
      self.inplanes = 512
      self.layer5 = self._make_residual_layer(BasicBlock, 512, layer5_num, stride=2) #1/64
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.conv1,base_model.bn1,base_model.relu,base_model.maxpool, base_model.layer1) #1/4
      
    elif model == 'senet':
      model_name = 'se_resnext50_32x4d'
      layer5_num = 3
      plane_size = [512,1024,2048,512]
      self.inplanes = 2048
      self.layer5 = self._make_residual_layer(Bottleneck, 128, layer5_num, stride=2) #1/64
      base_model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
      self.layer1 = nn.Sequential(base_model.layer0, base_model.layer1) #1/4
  

    self.layer2 = base_model.layer2 #1/8
    self.layer3 = base_model.layer3 #1/16
    self.layer4 = base_model.layer4 #1/32
     
      
      
    for p in self.layer1.parameters():
      p.requires_grad = False
  
    for p in self.layer2.parameters():
      p.requires_grad = False
        
    for p in self.layer3.parameters():
      p.requires_grad = False
      
    for p in self.layer4.parameters():
      p.requires_grad = False

    anchor_areas = [ 64*64., 128*128., 256*256., 512*512.] 
    aspect_ratios = [1/1.,1/2., 2/1.]
    scale_ratios = [1., 1./pow(2.,1/3.), 1./pow(2.,2/3.)]
    anchor_base_sizes = [8., 16., 32., 64. ]
    self.anchor_gen = Anchor(anchor_areas=anchor_areas,
                            aspect_ratios= aspect_ratios,
                            scale_ratios = scale_ratios,
                            anchor_base_sizes = anchor_base_sizes)
        
    num_anchors = self.anchor_gen.num_anchors

    self.ar_loc_heads = nn.ModuleList([
        self._make_head_layer(plane_size[0], num_anchors * 4),
        self._make_head_layer(plane_size[1], num_anchors * 4),
        self._make_head_layer(plane_size[2], num_anchors * 4),
        self._make_head_layer(plane_size[3], num_anchors * 4)
    ])
    
    self.ar_cls_heads = nn.ModuleList([
        self._make_head_layer(plane_size[0], num_anchors * 2),
        self._make_head_layer(plane_size[1], num_anchors * 2),
        self._make_head_layer(plane_size[2], num_anchors * 2),
        self._make_head_layer(plane_size[3], num_anchors * 2)
    ])
    
    self.od_loc_heads = nn.ModuleList([
        self._make_head_layer(256, num_anchors * 4),
        self._make_head_layer(256, num_anchors * 4),
        self._make_head_layer(256, num_anchors * 4),
        self._make_head_layer(256, num_anchors * 4)
    ])
    
    self.od_cls_heads = nn.ModuleList([
        self._make_head_layer(256, num_anchors * (num_classes+1)),
        self._make_head_layer(256, num_anchors * (num_classes+1)),
        self._make_head_layer(256, num_anchors * (num_classes+1)),
        self._make_head_layer(256, num_anchors * (num_classes+1))
    ])
    
    self.trans4 = nn.Sequential(nn.Conv2d(plane_size[3], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
    self.trans3 = nn.Sequential(nn.Conv2d(plane_size[2], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
    self.trans2 = nn.Sequential(nn.Conv2d(plane_size[1], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
    self.trans1 =  nn.Sequential(nn.Conv2d(plane_size[0], 256, kernel_size=3, stride=1, padding=1),
                                                     nn.ReLU(inplace=True),
                                                     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
                                     
    self.up4 = nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)
    self.up3 = nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)
    self.up2 = nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)

    self.latent4 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
    self.latent3 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
    self.latent2 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
    self.latent1 = nn.Sequential(nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True))
       
    self._normal_init(self.layer5)
    self._normal_init(self.ar_loc_heads)
    self._normal_init(self.ar_cls_heads)
    self._normal_init(self.od_loc_heads)
    self._normal_init(self.od_cls_heads)
    self._normal_init(self.trans4)
    self._normal_init(self.trans3)
    self._normal_init(self.trans2)
    self._normal_init(self.trans1)
    self._normal_init(self.up4)
    self._normal_init(self.up3)
    self._normal_init(self.up2)
    self._normal_init(self.latent4)
    self._normal_init(self.latent3)
    self._normal_init(self.latent2)
    self._normal_init(self.latent1)
    

  def _make_residual_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
        downsample = nn.Sequential(
            conv1x1(self.inplanes, planes * block.expansion, stride),
            nn.BatchNorm2d(planes * block.expansion),
        )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for _ in range(1, blocks):
        layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)
  
  def _make_head_layer(self, in_planes, out_planes, planes = 0, num_itr = 0):
    head_layer = []
    
    if(num_itr > 0):
      head_layer.append(nn.Conv2d(in_planes, planes, 3, 1, 1))
      head_layer.append(nn.ReLU(True))
      for i in range(1,num_itr):
        if i == num_itr - 1:
          head_layer.append(nn.Conv2d(planes, out_planes, 3, 1, 1))
        else:
          head_layer.append(nn.Conv2d(planes, planes, 3, 1, 1))
          head_layer.append(nn.ReLU(True))
    else:
      head_layer.append(nn.Conv2d(in_planes, out_planes, 3, 1, 1))
    
    return nn.Sequential(*head_layer)
  
  def _normal_init(self, layer):
    for m in layer.modules():
      if isinstance(m, nn.Conv2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
      if isinstance(m, nn.ConvTranspose2d):
          nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
          if m.bias is not None:
              nn.init.constant_(m.bias, 0)
      if isinstance(m, Bottleneck):
          nn.init.constant_(m.bn3.weight, 0)
      elif isinstance(m, BasicBlock):
          nn.init.constant_(m.bn2.weight, 0)

              
  def _upsample_add(self, x, y):
    _,_,H,W = y.size()
    return nn.functional.interpolate(x, size=(H,W), mode='bilinear') + y
  
  def _upsample_cat(self, x, y):
    _,_,H,W = y.size()
    layers = []
    layers.append(y)
    for sx in x:
       layers.append(nn.functional.interpolate(sx, size=(H,W), mode='bilinear'))
    return torch.cat(layers,1)
    
  def train(self, mode=True):
    super(GCRefDet, self).train(mode)
    #freeze bn
    #if self.model == 'resnet18':
    #  if mode:
    #    for layer in self.modules():
    #      if isinstance(layer, nn.BatchNorm2d):
    #        layer.eval()
            

    
  def forward(self,x):
    c1 = self.layer1(x)
    c2 = self.layer2(c1) #8
    c3 = self.layer3(c2) #16
    c4 = self.layer4(c3) #32
    c5 = self.layer5(c4) #64
 
    ar_locs = []
    ar_clss = []
    fm_size = []
    for fm, ar_loc_head, ar_cls_head in zip([c2,c3,c4,c5], self.ar_loc_heads, self.ar_cls_heads):
      ar_loc_h = ar_loc_head(fm)
      ar_cls_h = ar_cls_head(fm)
      ar_loc_h = ar_loc_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, 4)
      ar_cls_h = ar_cls_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, 2)
      ar_locs.append(ar_loc_h)
      ar_clss.append(ar_cls_h)
      fm_size.append(fm.size())
    ar_locs = torch.cat(ar_locs,1)
    ar_clss = torch.cat(ar_clss,1)
    anchor = self.anchor_gen.get_anchor_boxes(fm_size)
    
    o4 = self.latent4(self.trans4(c5))
    o3 = self.latent3(self.trans3(c4) + self.up4(o4))
    o2 = self.latent2(self.trans2(c3) + self.up3(o3))
    o1 = self.latent1(self.trans1(c2) + self.up2(o2))
    
    od_locs = []
    od_clss = []
    for fm, od_loc_head, od_cls_head in zip([o1,o2,o3,o4], self.od_loc_heads, self.od_cls_heads):
      od_loc_h = od_loc_head(fm)
      od_cls_h = od_cls_head(fm)
      od_loc_h = od_loc_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, 4)
      od_cls_h = od_cls_h.permute(0, 2, 3, 1).contiguous().view(fm.size(0), -1, self.num_classes+1)
      od_locs.append(od_loc_h)
      od_clss.append(od_cls_h)
    od_locs = torch.cat(od_locs,1)
    od_clss = torch.cat(od_clss,1)
    
    return ar_locs, ar_clss, od_locs, od_clss, anchor
  
def testgc():
  x = torch.randn((1,3, 576, 960))
  model = GCRefDet(6,model='resnet34')
  x = x.to(device)
  model = model.to(device)
  model.train()
  ar_loc, ar_cls, od_loc, od_cls, anchor = model(x)
  print(ar_loc[0].size())
  print(ar_cls[0].size())
  print(od_loc[0].size())
  print(od_cls[0].size())
  print(anchor.size())
  print(anchor[0])
  print(anchor[9])
  print(anchor[18])
  print(anchor[57*9])
  print(anchor[57*9+9])
  print(anchor[57*9-9])

testgc()

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.torch/models/resnet34-333f7ec4.pth
100%|██████████| 87306240/87306240 [00:01<00:00, 49683284.79it/s]


torch.Size([103275, 4])
torch.Size([103275, 2])
torch.Size([103275, 4])
torch.Size([103275, 7])
torch.Size([103275, 4])
tensor([-28., -28.,  35.,  35.])
tensor([-20., -28.,  43.,  35.])
tensor([-12., -28.,  51.,  35.])
tensor([428., -28., 491.,  35.])
tensor([436., -28., 499.,  35.])
tensor([420., -28., 483.,  35.])
