In [1]:
import torch
import torch.nn as nn

In [None]:
class block(nn.Module):
  def __init__(self,in_channels,out_channels,identity_downsample=None,stride=1):
    """
    ResNet 的 Bottleneck 残差块

    参数:
    ----------
    in_channels: int
        输入特征图的通道数（上一层的输出通道数）

    out_channels: int
        Bottleneck 中间的基础通道数，最后输出会乘以 expansion (默认4)

    identity_downsample: nn.Sequential 或 None
        对捷径分支(identity)进行下采样或通道匹配时使用的操作，
        当 stride != 1 或者 in_channels != out_channels * expansion 时需要传入

    stride: int, 默认1
        卷积层的步幅
        - stride=1: 保持 feature map 尺寸不变
        - stride=2: 在当前 block 开始时将 feature map 尺寸减半（通常在 layer 的第一个 block）
    """
    super(block,self).__init__()

    # Bottleneck 中输出通道扩张倍数
    # 最后一个 conv3 输出的通道数 = out_channels * expansion
    self.expansion = 4

    # 第一个 1x1 卷积，用于降维 (减少计算量)，不改变 H 和 W
    self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0)
    self.bn1 = nn.BatchNorm2d(out_channels)   # 批量归一化，稳定训练

    # 第二个 3x3 卷积，用于特征提取
    # stride 可以是 1 或 2，当 stride=2 时 spatial size 减半
    self.conv2 = nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=stride,padding=1)
    self.bn2 = nn.BatchNorm2d(out_channels)

    # 第三个 1x1 卷积，用于升维 (out_channels -> out_channels * 4)
    # 这样残差块输出的通道数会变成 4 倍，以适应后续网络结构
    self.conv3 = nn.Conv2d(out_channels,out_channels*self.expansion,kernel_size=1,stride=1,padding=0)  # 增大channel深度（*4），捕捉更多特征
    self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)

    # 激活函数
    self.relu = nn.ReLU()

    # 可能的下采样分支，当尺寸或通道数不一致时需要使用
    self.identity_downsample = identity_downsample 

  def forward(self, x):
    """
    前向传播流程:
    1. 先保存输入 x 作为 identity
    2. 主分支 (conv1 -> bn1 -> relu -> conv2 -> bn2 -> relu -> conv3 -> bn3)
    3. 如果需要，下采样 identity (使其与主分支输出形状一致)
    4. 主分支输出 + identity 相加
    5. 再通过 ReLU 激活
    """
    identity = x  # 保存捷径分支

    # --- 主分支 ---
    # 1x1 降维
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)

    # 3x3 特征提取
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)

    # 1x1 升维
    x = self.conv3(x)
    x = self.bn3(x)

    # --- 捷径分支 (identity mapping) ---
    # 当尺寸或通道数不匹配时，对 identity 进行变换
    if self.identity_downsample is not None:
      identity = self.identity_downsample(identity)

    # --- 残差相加 ---
    x += identity    # F(x) + identity
    x = self.relu(x)   # 最终激活
    return x



In [None]:
class ResNet(nn.Module):  # layers = [3, 4, 6, 3] 对应每一层 block 的数量
  def __init__(self,block,layers,image_channels,num_classes):
    super(ResNet,self).__init__()
    self.in_channels = 64
    # 初始卷积：将输入图像（3通道）转换为 64 通道，步幅2，下采样
    self.conv1 = nn.Conv2d(image_channels,64,kernel_size=7,stride=2,padding=3)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    # 最大池化进一步下采样
    self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

    # ResNet 四个主要 layer，每层包含多个残差块
    self.layer1 = self._make_layer(block,layers[0],out_channels=64,stride=1)
    self.layer2 = self._make_layer(block,layers[1],out_channels=128,stride=2)
    self.layer3 = self._make_layer(block,layers[2],out_channels=256,stride=2)
    self.layer4 = self._make_layer(block,layers[3],out_channels=512,stride=2)

    # 自适应池化到 1×1
    self.avgpool = nn.AdaptiveAvgPool2d((1,1))

    # 全连接层，注意 block 是 Bottleneck，每个最终输出为 512 * 4
    self.fc = nn.Linear(512*4,num_classes)

  def forward(self,x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

     # 依次通过四个残差阶段
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.reshape(x.shape[0],-1)
    x = self.fc(x)

    return x


  def _make_layer(self,block,num_residual_blocks,out_channels,stride):
    """
    block: 残差模块类型 (BasicBlock 或 Bottleneck)
    num_residual_blocks: 当前 stage 的 block 数量
    out_channels: 每个 block 内部的中间维度
    stride: 第一个 block 是否下采样，stride=2 表示 spatial size 减半
    """
    identity_downsample = None
    layers = []


    # 当下采样 (stride != 1) 或 通道数不匹配时，用 1x1 卷积调整 identity
    # 这样才能保证 F(x) + identity 的维度一致
    if stride != 1 or self.in_channels != out_channels * 4:
      identity_downsample = nn.Sequential(nn.Conv2d(self.in_channels,out_channels*4,kernel_size=1,stride=stride),
                                          nn.BatchNorm2d(out_channels*4)) 
    
     # 第一个 block 可能需要下采样或通道匹配
    layers.append(block(self.in_channels,out_channels,identity_downsample,stride))
    # 更新 in_channels 为当前 block 输出的通道数 (Bottleneck 输出为 out_channels * 4)
    self.in_channels = out_channels * 4

    # 其余 block 只需要保持 stride=1，不改变 feature map 尺寸
    for i in range(num_residual_blocks - 1): 
      layers.append(block(self.in_channels,out_channels)) # 256 -> 64, 64*4(256) again
    
    # *layers 将列表拆开作为多个参数传递给 nn.Sequential
    return nn.Sequential(*layers)

In [4]:
def ResNet50(img_channels=3,num_classes=1000):
  return ResNet(block,[3,4,6,3],img_channels,num_classes)

def ResNet101(img_channels=3,num_classes=1000):
  return ResNet(block,[3,4,23,3],img_channels,num_classes)

def ResNet152(img_channels=3,num_classes=1000):
  return ResNet(block,[3,8,36,3],img_channels,num_classes)

In [11]:
def test():
  net = ResNet50()
  x = torch.randn(2,3,224,224)
  y = net(x).to('cuda')
  print(y.shape)


test()

torch.Size([2, 1000])
