In [1]:
import math
import torch
import torch.nn as nn

In [2]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 
                              padding=padding, groups=groups, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU()

    def forward(self, x):
        return self.silu(self.bn(self.conv(x)))

In [3]:
class SqueezeExciatation(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super().__init__()
        # kinda like the attention mechanism, computes "attention score" of each channel
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

In [4]:
class InvertedResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, 
                 expand_ratio, reduction=4, survival_prob=0.8):
        super().__init__()
        self.survival_prob = survival_prob
        hidden_dim = in_channels * expand_ratio
        self.expand = in_channels != hidden_dim
        reduced_dim = int(in_channels / reduction)
        self.add_skip = in_channels == out_channels and stride == 1

        if self.expand:
            self.expand_conv = ConvBlock(in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)

        self.conv = nn.Sequential(
            ConvBlock(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=padding, groups=hidden_dim),
            SqueezeExciatation(hidden_dim, reduced_dim),
            nn.Conv2d(hidden_dim, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )

    # dropping randomly picked images in the minibatch
    def stochastic_depth(self, x):
        if not self.training:
            return x
        else:
            onehot_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
            return (x / self.survival_prob) * onehot_tensor

    def forward(self, inp):
        x = self.expand_conv(inp) if self.expand else inp
        if self.add_skip:
            return self.stochastic_depth(self.conv(x)) + inp
        else:
            return self.conv(x)

In [5]:
class EfficientNet(nn.Module):
    def __init__(self, version, num_classes):
        super().__init__()
        self.version_values = {
            # phi_value, resolution, drop_prob
            "b0": (0, 224, 0.2),
            "b1": (0.5, 240, 0.2),
            "b2": (1, 260, 0.3),
            "b3": (2, 300, 0.3),
            "b4": (3, 380, 0.4),
            "b5": (4, 456, 0.4),
            "b6": (5, 528, 0.5),
            "b7": (6, 600, 0.5),
        }

        self.base_model = [
            # expand_ratio, channels, repeats, stride, kernel_size
            [1, 16, 1, 1, 3],
            [6, 24, 2, 2, 3],
            [6, 40, 2, 2, 5],
            [6, 80, 3, 2, 3],
            [6, 112, 3, 1, 5],
            [6, 192, 4, 2, 5],
            [6, 320, 1, 1, 3],
        ]

        width_factor, depth_factor, drop_prob = self.get_factors(version)
        last_channels = math.ceil(1280 * width_factor)

        self.mb_convs = self.get_mb_convs(width_factor, depth_factor, last_channels)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.final = nn.Sequential(
            nn.Dropout(drop_prob),
            nn.Linear(last_channels, num_classes)
        )

    def get_factors(self, version, alpha=1.2, beta=1.1):
        phi, res, drop_prob = self.version_values[version]
        depth_factor = alpha ** phi
        width_factor = beta ** phi

        return width_factor, depth_factor, drop_prob

    def get_mb_convs(self, width_factor, depth_factor, last_channels):
        channels = int(32 * width_factor)
        mb_conv_layers = [ConvBlock(3, channels, kernel_size=3, stride=2, padding=1)]
        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in self.base_model:
            out_channels = 4 * math.ceil(int(channels * width_factor) / 4)
            layer_repeats = math.ceil(repeats * depth_factor)

            for layer in range(layer_repeats):
                mb_conv_layers.append(
                    InvertedResidualBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride if layer==0 else 1, 
                                          padding=kernel_size//2, expand_ratio=expand_ratio)
                )
                in_channels = out_channels

        mb_conv_layers.append(ConvBlock(in_channels, last_channels, kernel_size=1, stride=1, padding=0))
        return nn.Sequential(*mb_conv_layers)

    def forward(self, x):
        x = self.pool(self.mb_convs(x))
        x = x.view(x.shape[0], -1)
        return self.final(x)

In [6]:
inp = torch.randn(1, 3, 224, 224)
net = EfficientNet("b0", 10)
out = net(inp)
print(out.shape)

torch.Size([1, 10])
