# **Feature Pyramids**
* The content is followed using "Feature Pyramid Network | Neck | Essentials of Object Detection".<br>Reference: https://www.youtube.com/watch?v=FKsgO0U7CUw&list=PLivJwLo9VCUJXdO8SiOjZTWr_fXrAy4OQ&index=10
* Extended by **Vigyannveshi** 

In [1]:
from __future__ import annotations
from functools import partial

import torch as tr
import torch.nn as nn
import torch.nn.functional as F
from torchvision.ops.misc import Conv2dNormActivation

from typing import NamedTuple

In [5]:
hl_fm=tr.randn(size=(1,512,13,13))
ml_fm=tr.randn(size=(1,256,26,26))
ll_fm=tr.randn(size=(1,128,52,52))

In [6]:
LeakyRelu_Inplace=partial(
    nn.LeakyReLU,
    negative_slope=0.1,
    inplace=True
)

ConvBlockReduceChannels=partial(Conv2dNormActivation,
                                kernel_size=1,
                                activation_layer=LeakyRelu_Inplace
                                )

In [7]:
conv_hl_reduce=ConvBlockReduceChannels(in_channels=512,out_channels=64)
conv_ml_reduce=ConvBlockReduceChannels(in_channels=256,out_channels=64)
conv_ll_reduce=ConvBlockReduceChannels(in_channels=128,out_channels=64)

In [8]:
hl_fm_r = conv_hl_reduce(hl_fm)
ml_fm_r = conv_ml_reduce(ml_fm)
ll_fm_r = conv_ll_reduce(ll_fm)

print(f"New HL shape - {hl_fm_r.shape}")
print(f"New ML shape - {ml_fm_r.shape}")
print(f"New LL shape - {ll_fm_r.shape}")

New HL shape - torch.Size([1, 64, 13, 13])
New ML shape - torch.Size([1, 64, 26, 26])
New LL shape - torch.Size([1, 64, 52, 52])


In [9]:
hl_upsampler = nn.Upsample(scale_factor=2, mode="nearest")
hl_fm_r_upsampled = hl_upsampler(hl_fm_r)

hl_fm_r_upsampled.shape

torch.Size([1, 64, 26, 26])

In [11]:
hl_ml_fused = tr.add(hl_fm_r_upsampled, ml_fm_r)

hl_ml_fused.shape

torch.Size([1, 64, 26, 26])

In [13]:
ConvSmoother = partial(
                Conv2dNormActivation,
                  in_channels=64, 
                  out_channels=64, 
                  kernel_size=3, 
                  activation_layer=LeakyRelu_Inplace
               )

hl_ml_fused_smoother = ConvSmoother()

smooth_hl_ml_fused = hl_ml_fused_smoother(hl_ml_fused)

print(smooth_hl_ml_fused.shape) 

torch.Size([1, 64, 26, 26])


In [16]:
class FakeBackboneResult(NamedTuple):
  hl_features: tr.Tensor
  ml_features: tr.Tensor
  ll_features: tr.Tensor

class FakeBackbone(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self, x: tr.Tensor) -> FakeBackboneResult:
    hl_fm = tr.randn(size=(1, 512, 13, 13))
    ml_fm = tr.randn(size=(1, 256, 26, 26))
    ll_fm = tr.randn(size=(1, 128, 52, 52))

    return FakeBackboneResult(
        hl_features=hl_fm,
        ml_features=ml_fm,
        ll_features=ll_fm
    )

In [17]:
class FPNNeck(nn.Module):
  def __init__(self, in_channels: list[int], out_channels: int):
    super().__init__()

    LeakyRelu_Inplace = partial(
        nn.LeakyReLU,
        negative_slope=0.1,
        inplace=True,
    )

    ConvBlock = partial(Conv2dNormActivation, activation_layer=LeakyRelu_Inplace)

    self.hl_channel_reducer = ConvBlock(
        in_channels=in_channels[0],
        out_channels=out_channels,
        kernel_size=1,
    )

    self.ml_channel_reducer = ConvBlock(
        in_channels=in_channels[1],
        out_channels=out_channels,
        kernel_size=1,
    )

    self.ll_channel_reducer = ConvBlock(
        in_channels=in_channels[2],
        out_channels=out_channels,
        kernel_size=1,
    )

    self.ml_smoother = ConvBlock(
        in_channels=out_channels,
        out_channels=out_channels,
    )

    self.ll_smoother = ConvBlock(
        in_channels=out_channels,
        out_channels=out_channels,
    )

  def forward(self, 
              hl_features:torch.Tensor, 
              ml_features:torch.Tensor,
              ll_features:torch.Tensor):
    
    hl_channel_r = self.hl_channel_reducer(hl_features)
    ml_channel_r = self.ml_channel_reducer(ml_features)
    ll_channel_r = self.ll_channel_reducer(ll_features)

    upsample_hl = F.interpolate(
        hl_channel_r,
        size=[ml_channel_r.size(2), ml_channel_r.size(3)],
        mode="nearest",
    )

    fused_hl_ml = upsample_hl + ml_channel_r
    smoothed_ml_features = self.ml_smoother(fused_hl_ml)

    upsample_ml = F.interpolate(
        smoothed_ml_features,
        size=[ll_channel_r.size(2), ll_channel_r.size(3)],
        mode="nearest",
    )
    fused_ml_ll = upsample_ml + ll_channel_r
    smoothed_ll_features = self.ll_smoother(fused_ml_ll)

    out = [hl_channel_r, smoothed_ml_features, smoothed_ll_features]
    return out


In [18]:
neck = FPNNeck(in_channels=[512, 256, 128], out_channels=64)
backbone = FakeBackbone()

backbone_output = backbone(tr.randn(size=(1, 3, 416, 416)))

enriched_hl_features, enriched_ml_features, enriched_ll_features = neck(backbone_output.hl_features, 
     backbone_output.ml_features, 
     backbone_output.ll_features)

In [19]:
enriched_hl_features.shape

torch.Size([1, 64, 13, 13])

In [20]:
enriched_ml_features.shape

torch.Size([1, 64, 26, 26])

In [21]:
enriched_ll_features.shape

torch.Size([1, 64, 52, 52])