In [1]:
import os
import cv2
import numpy as np
import torch
from natsort import natsorted
from torchvision.transforms import functional as F
from torch import nn
import math
from skimage.metrics import structural_similarity
from torchvision import transforms
from PIL import Image


class SRCNN(nn.Module):
    def __init__(self) -> None:
        super(SRCNN, self).__init__()

        # 定义模型的卷积层
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, (9, 9), (1, 1), (4, 4)),  # 输入通道数为1，输出通道数为64，卷积核大小为9x9，步长为1，填充为4
            nn.ReLU(True)  # ReLU激活函数
        )

        self.map = nn.Sequential(
            nn.Conv2d(64, 32, (5, 5), (1, 1), (2, 2)),  # 输入通道数为64，输出通道数为32，卷积核大小为5x5，步长为1，填充为2
            nn.ReLU(True)  # ReLU激活函数
        )

        self.reconstruction = nn.Conv2d(32, 1, (5, 5), (1, 1), (2, 2))  # 输入通道数为32，输出通道数为1，卷积核大小为5x5，步长为1，填充为2

        self._initialize_weights()  # 初始化网络权重

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self._forward_impl(x)  # 实现前向传播

    def _forward_impl(self, x: torch.Tensor) -> torch.Tensor:
        out = self.features(x)  # 卷积层特征提取
        out = self.map(out)  # 映射层特征提取
        out = self.reconstruction(out)  # 重构层进行图像重建

        return out

    def _initialize_weights(self) -> None:
        # 初始化所有卷积层的权重和偏置
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                nn.init.normal_(module.weight.data, 0.0,
                                math.sqrt(2 / (module.out_channels * module.weight.data[0][0].numel())))
                nn.init.zeros_(module.bias.data)

        # 初始化重构层的权重和偏置
        nn.init.normal_(self.reconstruction.weight.data, 0.0, 0.001)
        nn.init.zeros_(self.reconstruction.bias.data)

# 将BGR格式的张量转换为YCbCr格式的张量.如果only_use_y_channel为True，则仅使用Y通道；否则，使用Y、Cb、Cr三个通道。该函数返回一个PyTorch张量。
def bgr2ycbcr_torch(tensor: torch.Tensor, only_use_y_channel: bool) -> torch.Tensor:
    if only_use_y_channel:
        weight = torch.Tensor([[24.966], [128.553], [65.481]]).to(tensor)
        tensor = torch.matmul(tensor.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + 16.0
    else:
        weight = torch.Tensor([[24.966, 112.0, -18.214],
                               [128.553, -74.203, -93.786],
                               [65.481, -37.797, 112.0]]).to(tensor)
        bias = torch.Tensor([16, 128, 128]).view(1, 3, 1, 1).to(tensor)
        tensor = torch.matmul(tensor.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + bias
    tensor /= 255.
    return tensor

# 将YCbCr格式的NumPy数组转换为RGB格式的NumPy数组。
def ycbcr2rgb(image: np.ndarray) -> np.ndarray:
    image_dtype = image.dtype
    image *= 255.
    image = np.matmul(image, [[0.00456621, 0.00456621, 0.00456621],
                              [0, -0.00153632, 0.00791071],
                              [0.00625893, -0.00318811, 0]]) * 255.0 + [-222.921, 135.576, -276.836]

    image /= 255.
    image = image.astype(image_dtype)
    return image

# 将NumPy数组转换为PyTorch张量。如果range_norm为True，则将像素值从[0,1]范围转换为[-1,1]范围；如果half为True，则将数据类型从torch.float32转换为torch.half。
def image2tensor(image: np.ndarray, range_norm: bool, half: bool) -> torch.Tensor:
    tensor = F.to_tensor(image)
    # Scale the image data from [0, 1] to [-1, 1]
    if range_norm:
        tensor = tensor.mul(2.0).sub(1.0)
    # Convert torch.float32 image data type to torch.half image data type
    if half:
        tensor = tensor.half()
    return tensor

# 将BGR格式的NumPy数组转换为YCbCr格式的NumPy数组。如果only_use_y_channel为True，则仅使用Y通道；否则，使用Y、Cb、Cr三个通道。
def bgr2ycbcr(image: np.ndarray, only_use_y_channel: bool) -> np.ndarray:
    if only_use_y_channel:
        image = np.dot(image, [24.966, 128.553, 65.481]) + 16.0
    else:
        image = np.matmul(image, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [
            16, 128, 128]

    image /= 255.
    image = image.astype(np.float32)
    return image

# 将YCbCr格式的NumPy数组转换为BGR格式的NumPy数组。
def ycbcr2bgr(image: np.ndarray) -> np.ndarray:
    image_dtype = image.dtype
    image *= 255.
    image = np.matmul(image, [[0.00456621, 0.00456621, 0.00456621],
                              [0.00791071, -0.00153632, 0],
                              [0, -0.00318811, 0.00625893]]) * 255.0 + [-276.836, 135.576, -222.921]

    image /= 255.
    image = image.astype(image_dtype)
    return image

# 将PyTorch张量转换为NumPy数组。
# 如果range_norm为True，则将像素值从[-1,1]范围转换为[0,1]范围；
# 如果half为True，则将数据类型从torch.float32转换为torch.half。
def tensor2image(tensor: torch.Tensor, range_norm: bool, half: bool):
    if range_norm:
        tensor = tensor.add(1.0).div(2.0)
    if half:
        tensor = tensor.half()
    image = tensor.squeeze(0).permute(1, 2, 0).mul(255).clamp(0, 255).cpu().numpy().astype("uint8")
    return image

# 计算两张图像的峰值信噪比（PSNR）。该函数使用PyTorch张量进行计算。
def psnr(img1, img2):
    return 10. * torch.log10(1. / torch.mean((img1 - img2) ** 2))


def main() -> None:
    # 设置超参数和文件路径
    lr_dir = f"./data/Set5"  # 低分辨率图像路径
    hr_dir = f"./data/Set5"  # 高分辨率图像路径
    device = torch.device("cuda", 0)  # 使用GPU训练
    upscale_factor = 4  # 放大倍数
    model_path = './srcnn_model/srcnn_x4-T91-7c460643.pth.tar'  # 模型文件路径
    psnr_metrics_all = 0.0  # PSNR指标累计值
    ssim_metrics_all = 0.0  # SSIM指标累计值

    # 创建模型实例
    model = SRCNN().to(device=device, memory_format=torch.channels_last)

    # 加载预训练的模型
    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint["state_dict"])

    # 创建输出结果的文件夹
    results_dir = os.path.join("srcnn_results")
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    # 将模型设置为评估模式，并使用半精度浮点数进行推理
    model.eval()
    model.half()

    # 初始化PSNR和SSIM指标
    psnr_metrics = 0.0
    ssim_metrics = 0.0

    # 对所有图像进行测试
    file_names = natsorted(os.listdir(lr_dir))
    total_files = len(file_names)
    for index in range(total_files):
        # 读取低分辨率图像、高分辨率图像和输出超分辨率图像的路径
        lr_image_path = os.path.join(lr_dir, file_names[index])
        sr_image_path = os.path.join(results_dir, f'super_resolution_{file_names[index]}')
        hr_image_path = os.path.join(hr_dir, file_names[index])

        # 打印当前处理的图像路径
        print(f"正在处理 `{os.path.abspath(lr_image_path)}`...")

        # 读取高分辨率图像并将像素值归一化到[0, 1]范围内
        hr_image = cv2.imread(hr_image_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.0

        # 读取低分辨率图像并进行双三次插值
        lr_img = Image.open(lr_image_path)
        size = np.min(lr_img.size)
        downscale = transforms.Resize(int(size / 4), interpolation=Image.BICUBIC)
        upscale = transforms.Resize(int(size), interpolation=Image.BICUBIC)
        lr_img = downscale(lr_img)
        lr_img = upscale(lr_img)

        # 将低分辨率图像转换为numpy数组，并将像素值缩放到[0,1]范围内
        lr_image = np.array(lr_img).astype(np.float32) / 255.0

        # 将RGB图像转换为BGR图像
        lr_image = cv2.cvtColor(lr_image, cv2.COLOR_RGB2BGR)

        # 保存高分辨率图像和低分辨率图像
        cv2.imwrite(os.path.join(results_dir, f'GroundTruth_{file_names[index]}'), hr_image * 255.0)
        cv2.imwrite(os.path.join(results_dir, f'subsample_{file_names[index]}'), lr_image * 255.0)

        # 将低分辨率BGR图像转换为YCbCr图像，并返回Y通道
        lr_y_image = bgr2ycbcr(lr_image, True)

        # 将高分辨率BGR图像转换为YCbCr图像，并返回Y通道
        hr_y_image = bgr2ycbcr(hr_image, True)

        # 将高分辨率BGR图像转换为YCbCr图像，并分离出Cb和Cr通道
        hr_ycbcr_image = bgr2ycbcr(hr_image, False)
        _, hr_cb_image, hr_cr_image = cv2.split(hr_ycbcr_image)

        # 将Y通道图像转换为PyTorch张量，并将其添加一个批次维度
        lr_y_tensor = image2tensor(lr_y_image, False, True).unsqueeze_(0)
        hr_y_tensor = image2tensor(hr_y_image, False, True).unsqueeze_(0)

        # 将张量移动到指定的设备，使用通道最后内存格式，并启用非阻塞式传输
        lr_y_tensor = lr_y_tensor.to(device=device, memory_format=torch.channels_last, non_blocking=True)
        hr_y_tensor = hr_y_tensor.to(device=device, memory_format=torch.channels_last, non_blocking=True)

        # 使用模型生成超分辨率图像，将像素值裁剪到[0,1]范围内
        with torch.no_grad():
            sr_y_tensor = model(lr_y_tensor).clamp_(0, 1.0)
        sr_y_image = tensor2image(sr_y_tensor, False, True)
        sr_y_image = sr_y_image.astype(np.float32) / 255.0

        # 将生成的Y通道图像与原始Cb和Cr通道合并为YCbCr图像
        sr_ycbcr_image = cv2.merge([sr_y_image, hr_cb_image, hr_cr_image])

        # 将YCbCr图像转换为BGR图像
        sr_image = ycbcr2bgr(sr_ycbcr_image)

        # 将原始Y通道图像转换为numpy数组，并将像素值缩放到[0,1]范围内
        hr_y_image = tensor2image(hr_y_tensor, False, True)
        hr_y_image = hr_y_image.astype(np.float32) / 255.0

        # 计算超分辨率图像的PSNR和SSIM指标
        psnr_metrics = psnr(sr_y_tensor, hr_y_tensor).item()
        print("sr_y_image", sr_y_image.shape)
        print("hr_y_image", hr_y_image.shape)
        ssim_metrics = structural_similarity(sr_y_image, hr_y_image, win_size=7, gaussian_weights=True,
                                             multichannel=True, data_range=1.0, K1=0.01, K2=0.03, sigma=1.5)
        # 将所有图像的PSNR和SSIM指标加起来，计算平均值
        psnr_metrics_all += psnr_metrics
        ssim_metrics_all += ssim_metrics
        # 将PSNR和SSIM指标以文字形式添加到SR图像上，并保存图像
        text = 'psnr:' + str(round(float(psnr_metrics), 3)) + ' ssim:' + str(ssim_metrics)
        cv2.putText(sr_image, text, (40, 50), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)
        # 打印每张图像的PSNR和SSIM指标
        print(file_names[index], f' psnr:{psnr_metrics}')
        print(file_names[index], f' ssim:{ssim_metrics}')
        cv2.imwrite(sr_image_path, sr_image * 255.0)
    avg_psnr = 100 if psnr_metrics_all / total_files > 100 else psnr_metrics_all / total_files
    avg_ssim = 1 if ssim_metrics_all / total_files > 1 else ssim_metrics_all / total_files
    print(f"PSNR: {avg_psnr:4.2f} [dB]\n"
          f"SSIM: {avg_ssim:4.4f} [u]")


if __name__ == "__main__":
    main()


正在处理 `E:\jupyter\three\data\Set5\baby.png`...




sr_y_image (512, 512, 1)
hr_y_image (512, 512, 1)


ValueError: win_size exceeds image extent. Either ensure that your images are at least 7x7; or pass win_size explicitly in the function call, with an odd value less than or equal to the smaller side of your images. If your images are multichannel (with color channels), set channel_axis to the axis number corresponding to the channels.