In [1]:
import torch
import torch.nn as nn

# SE注意力

In [None]:
class SEAttention(nn.Module):
    def __init__(self, in_channels, ratio=16):
        super(se_block, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
                nn.Linear(in_channels, in_channels//ratio, bias=False),
                nn.ReLU(inplace=True),
                nn.Linear(in_channels//ratio, in_channels, bias=False),
                nn.Sigmoid()
        )

    def forward(self, x):
        # x: [b, c, h, w]
        b, c, _, _ = x.size()
        # x: [b, c, h, w] -> [b, c, 1, 1]
        # [b, c, 1, 1] -> y: [b, c]
        y = self.avg_pool(x).view(b, c)
        # y: [b, c] -> [b, c]
        # [b, c] -> [b, c, 1, 1]
        y = self.fc(y).view(b, c, 1, 1)
        # [b, c, 1, 1]
        return x * y

# CBAM注意力

## 通道注意力

In [2]:
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, ratio=8):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        # 利用1x1卷积代替全连接
        self.fc1   = nn.Conv2d(in_channels, out_channels=in_channels//ratio, kernel_size=1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2   = nn.Conv2d(in_channels//ratio, in_channels, kernel_size=1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: [b, in_channels, h, w] -> [b, in_channels, 1, 1]
        # [b, in_channels, 1, 1] -> [b, in_channels//ratio, 1, 1]
        # [b, in_channels//ratio, 1, 1] -> avg_out: [b, in_channels, 1, 1]
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        # x: [b, in_channels, h, w] -> [b, in_channels, 1, 1]
        # [b, in_channels, 1, 1] -> [b, in_channels//ratio, 1, 1]
        # [b, in_channels//ratio, 1, 1] -> max_out: [b, in_channels, 1, 1]
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        # out: [b, in_channels, 1, 1]
        out = avg_out + max_out
        return self.sigmoid(out)

## 空间注意力

In [3]:
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1
        self.conv = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: [b, c, h, w] -> avg_out: [b, 1, h, w]
        avg_out = torch.mean(x, dim=1, keepdim=True)
        # x: [b, c, h, w] -> max_out: [b, 1, h, w]
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        # x: [b, 2, h, w]
        x = torch.cat([avg_out, max_out], dim=1)
        # x: [b, 1, h, w]
        x = self.conv(x)
        return self.sigmoid(x)

## CBAM模块

In [4]:
class CBAM(nn.Module):
    def __init__(self, in_channels, ratio=8, kernel_size=7):
        super(CBAM, self).__init__()
        self.ca = ChannelAttention(in_channels, ratio=ratio)
        self.sa = SpatialAttention(kernel_size=kernel_size)

    def forward(self, x):
        # [b, in_channels, h, w] * [b, in_channels, 1, 1] = [b, in_channels, h, w]
        x = x * self.ca(x)
        # [b, in_channels, h, w] * [b, 1, h, w] = [b, in_channels, h, w]
        x = x * self.sa(x)
        return x