## Types of shortcut connections

  - All six types of shortcut connections explored in paper,"**Identity Mappings in Deep Residual Networks**"

In [2]:
## Imports
import torch
import torch.nn as nn

In [3]:
# 1. Original Residual Block (Standard ResNet)
class OriginalResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(OriginalResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out

# Example Usage
example_input = torch.randn(1, 64, 32, 32)
original_block = OriginalResidualBlock(64, 64)
output = original_block(example_input)
print("Original Residual Block Output Shape:", output.shape)

Original Residual Block Output Shape: torch.Size([1, 64, 32, 32])


In [4]:
# 2. Residual Block with Constant Scaling (alpha=0.5)
class ConstantScalingResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None, alpha=0.5):
        super(ConstantScalingResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.alpha = alpha

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.alpha * out + identity  # Constant scaling of residual connection
        out = self.relu(out)
        return out

# Example Usage
constant_scaling_block = ConstantScalingResidualBlock(64, 64)
output = constant_scaling_block(example_input)
print("Constant Scaling Residual Block Output Shape:", output.shape)

Constant Scaling Residual Block Output Shape: torch.Size([1, 64, 32, 32])


In [5]:
# 3. Residual Block with Exclusive Gating
class ExclusiveGatingResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ExclusiveGatingResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.gate = nn.Sigmoid()

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.gate(out) * identity  # Exclusive gating applied to residual
        out = self.relu(out)
        return out

# Example Usage
exclusive_gating_block = ExclusiveGatingResidualBlock(64, 64)
output = exclusive_gating_block(example_input)
print("Exclusive Gating Residual Block Output Shape:", output.shape)

Exclusive Gating Residual Block Output Shape: torch.Size([1, 64, 32, 32])


In [6]:
# 4. Shortcut-Only Gating Residual Block
class ShortcutOnlyGatingResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ShortcutOnlyGatingResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.gate = nn.Sigmoid()

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.gate(identity) * out  # Gating applied only to shortcut
        out = self.relu(out)
        return out

# Example Usage
short_cut_only = ShortcutOnlyGatingResidualBlock(64, 64)
output = short_cut_only(example_input)
print("Shortcut-only Scaling Residual Block Output Shape:", output.shape)

Shortcut-only Scaling Residual Block Output Shape: torch.Size([1, 64, 32, 32])


In [7]:
# 5. Residual Block with Convolution Shortcut
class ConvolutionShortcutResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ConvolutionShortcutResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)

    def forward(self, x):
        identity = self.shortcut(x)  # Use convolution instead of identity mapping
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out

# Example Usage
conv_only = ConvolutionShortcutResidualBlock(64, 64)
output = conv_only(example_input)
print("Convolution-only Scaling Residual Block Output Shape:", output.shape)

Convolution-only Scaling Residual Block Output Shape: torch.Size([1, 64, 32, 32])


In [8]:
# 6. Residual Block with Dropout Shortcut
class DropoutShortcutResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, dropout_prob=0.5):
        super(DropoutShortcutResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.dropout = nn.Dropout(p=dropout_prob)

    def forward(self, x):
        identity = self.dropout(x)  # Apply dropout to shortcut connection
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out

# Example Usage
dropout_scaling = DropoutShortcutResidualBlock(64, 64)
output = dropout_scaling(example_input)
print("Dropout Scaling Residual Block Output Shape:", output.shape)


Dropout Scaling Residual Block Output Shape: torch.Size([1, 64, 32, 32])
