In [3]:
import torch
model = torch.hub.load(
            'facebookresearch/pytorchvideo', 
            'slow_r50', 
            pretrained=True,
        )

model.eval()
print(model)

Using cache found in C:\Users\WANGZ/.cache\torch\hub\facebookresearch_pytorchvideo_main


Net(
  (blocks): ModuleList(
    (0): ResNetBasicStem(
      (conv): Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
      (norm): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU()
      (pool): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=[0, 1, 1], dilation=1, ceil_mode=False)
    )
    (1): ResStage(
      (res_blocks): ModuleList(
        (0): ResBlock(
          (branch1_conv): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
          (branch1_norm): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (branch2): BottleneckBlock(
            (conv_a): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
            (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act_a): ReLU()
            (conv_b): Conv3d(64, 64, kernel_size=(1, 3, 3), stride=(1, 

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SlowR50_5ch(nn.Module):
    """
    使用 `torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', ...)`
    加载得到的 Net，对其第一层(3通道)和最后一层(400类)进行替换：
      - 第一层 -> in_channels
      - 最后一层 -> num_classes
    """
    def __init__(self, in_channels=5, num_classes=2, pretrained=True):
        super().__init__()
        # 1) 加载 slow_r50
        self.model = torch.hub.load(
            'facebookresearch/pytorchvideo',
            'slow_r50',
            pretrained=pretrained
        )
        
        # 查看网络结构 (可选调试)
        # print(self.model)  
        # for name, module in self.model.named_modules():
        #     print(name, module)

        # 2) 替换第一层卷积
        #    blocks[0] 是 ResNetBasicStem, 其中有个 conv: Conv3d(3,64,...)
        stem = self.model.blocks[0]
        old_conv = stem.conv  # Conv3d(3, 64, ...)
        
        new_conv = nn.Conv3d(
            in_channels=in_channels,
            out_channels=old_conv.out_channels,
            kernel_size=old_conv.kernel_size,
            stride=old_conv.stride,
            padding=old_conv.padding,
            bias=(old_conv.bias is not None),
            dilation=old_conv.dilation,
            groups=old_conv.groups
        )

        # 初始化新 conv 的权重
        with torch.no_grad():
            if in_channels > 3:
                # 拷贝前3个通道的预训练权重
                new_conv.weight[:, :3, ...] = old_conv.weight
                # 其余通道用前3通道的均值(或其他策略)
                new_conv.weight[:, 3:, ...] = torch.mean(old_conv.weight, dim=1, keepdim=True)
            else:
                # 若 in_channels < 3, 可只用前 in_channels 个通道
                new_conv.weight = old_conv.weight

        # 替换到模型
        stem.conv = new_conv
        
        # 3) 替换最后一层
        #    blocks[5] 是 ResNetBasicHead, 其中 (proj): Linear(2048,400)
        head = self.model.blocks[5]
        old_fc = head.proj
        in_features = old_fc.in_features
        
        new_fc = nn.Linear(in_features, num_classes)
        head.proj = new_fc
    
    def forward(self, x):
        # x shape: (B, in_channels, T, H, W)
        return self.model(x)


In [6]:
# train_5ch_slow_r50.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

def train_model(model, dataloader, criterion, optimizer, device='cuda', num_epochs=10):
    model.to(device)
    best_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)  # => (B,2)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), "best_slow_r50_5ch.pth")
            print(f"Model saved. Best loss = {best_loss:.4f}")

if __name__ == "__main__":
    csv_path = "./sample_list.csv"
    dataset = MultiModal3DDataset(csv_path, transform=None, output_shape=(64,64,64))
    dataloader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4)

    model = SlowR50_5ch(in_channels=5, num_classes=2, pretrained=True)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    
    train_model(model, dataloader, criterion, optimizer, device='cuda', num_epochs=10)


ModuleNotFoundError: No module named 'dataset_5ch'