In [57]:
import torch
import torch.nn as nn

class SRCNN(nn.Module):
    def __init__(self):
        super(SRCNN, self).__init__()
        # 第一层卷积，9x9卷积核，输出通道数64
        self.conv1 = nn.Conv2d(1, 64, kernel_size=9, padding=4)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(32, 1, kernel_size=5, padding=2)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.conv3(x)
        return x

In [58]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

class SRDataset(Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        """
        Args:
            lr_dir (str): 低分辨率图像文件夹路径。
            hr_dir (str): 高分辨率图像文件夹路径。
            transform (callable, optional): 图像转换方法。
        """
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.lr_images = sorted(os.listdir(lr_dir))  # 低分辨率图像列表
        self.hr_images = sorted(os.listdir(hr_dir))  # 高分辨率图像列表
        self.transform = transform  # 数据增强或图像转换

    def __len__(self):
        return len(self.lr_images)  # 返回数据集中图像的数量

    def __getitem__(self, idx):
        # 获取低分辨率和高分辨率图像路径
        lr_image_path = os.path.join(self.lr_dir, self.lr_images[idx])
        hr_image_path = os.path.join(self.hr_dir, self.hr_images[idx])

        # 打开图像
        lr_image = Image.open(lr_image_path).convert("RGB")
        hr_image = Image.open(hr_image_path).convert("RGB")

        print(lr_image.size, hr_image.size) 
        
        # 图像转换（如果有）
        if self.transform:
            lr_image = self.transform(lr_image)
            hr_image = self.transform(hr_image)

        return lr_image, hr_image  # 返回成对的LR和HR图像
    


In [59]:
# 自定义collate_fn以处理不同尺寸的图像
def collate_fn(batch):
    lr_images, hr_images = zip(*batch)
    return list(lr_images), list(hr_images)

# 定义图像转换
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像转换为Tensor格式
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 归一化
])

# 初始化数据集
lr_dir = '../data/DIV2K_train_LR_bicubic/X4'
hr_dir = '../data/DIV2K_train_HR'
dataset = SRDataset(lr_dir=lr_dir, hr_dir=hr_dir, transform=transform)

# 使用DataLoader加载数据
batch_size = 16
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)


In [60]:
import torch.optim as optim
import torch.nn as nn

# 初始化模型、损失函数和优化器
model = SRCNN()
criterion = nn.MSELoss()  # 使用MSE损失
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10  # 训练轮数

# 训练循环
for epoch in range(num_epochs):
    for lr_imgs, hr_imgs in dataloader:
        optimizer.zero_grad()
        
        # 将每张图像输入模型
        sr_imgs = [model(lr_img.unsqueeze(0)) for lr_img in lr_imgs]  # 单个图像输入，添加批次维度
        
        # 计算损失
        loss = sum(criterion(sr_img.squeeze(0), hr_img) for sr_img, hr_img in zip(sr_imgs, hr_imgs)) / len(sr_imgs)
        
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")


(510, 396) (2040, 1584)
(510, 339) (2040, 1356)
(510, 339) (2040, 1356)
(510, 339) (2040, 1356)
(510, 450) (2040, 1800)
(339, 510) (1356, 2040)
(510, 339) (2040, 1356)
(510, 342) (2040, 1368)
(339, 510) (1356, 2040)
(510, 339) (2040, 1356)
(510, 339) (2040, 1356)
(510, 339) (2040, 1356)
(510, 339) (2040, 1356)
(510, 384) (2040, 1536)
(339, 510) (1356, 2040)
(510, 339) (2040, 1356)


RuntimeError: stack expects each tensor to be equal size, but got [3, 396, 510] at entry 0 and [3, 339, 510] at entry 1