In [7]:
import os
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

class TestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.img_names = os.listdir(img_dir)  # 获取所有图像文件名

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_name = self.img_names[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')  # 确保图像是 3 通道
        if self.transform:
            image = self.transform(image)

class CustomDataset(Dataset):
    def __init__(self, img_dir, annotations_file, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.annotations, self.label_to_idx = self._load_annotations(annotations_file)

    def _load_annotations(self, annotations_file):
        annotations = []
        label_to_idx = {}  # 标签映射
        current_idx = 0

        with open(annotations_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                img_name = parts[0]  # 图片文件名
                label = parts[1]  # 类别标签

                # 将字符串标签映射为整数
                if label not in label_to_idx:
                    label_to_idx[label] = current_idx
                    current_idx += 1
                label_idx = label_to_idx[label]

                annotations.append((img_name, label_idx))

        return annotations, label_to_idx

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_name, label = self.annotations[idx]
        img_path = os.path.join(self.img_dir, img_name)
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return self.__getitem__((idx + 1) % len(self))

In [6]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
import torch

temp_transform = transforms.Compose([
    transforms.Resize(256),  # 确保所有图像统一尺寸
    transforms.CenterCrop(224),
    transforms.ToTensor()  # 关键：将PIL图像转为Tensor
])

train_dataset = ImageFolder(
    root='imagenet_mini/imagenet-mini/train/',
    transform=temp_transform  # 添加transform参数
)

def calculate_mean_std(dataset):
    loader = DataLoader(
        dataset,
        batch_size=64,
        num_workers=4,
        shuffle=False  # 必须关闭shuffle以保证统计正确性
    )
    
    mean = 0.0
    std = 0.0
    total_images = 0
    
    with torch.no_grad():
        for images, _ in loader:
            # 当前批次图像形状: [B, C, H, W]
            batch_samples = images.size(0)
            images = images.view(batch_samples, images.size(1), -1)
            
            mean += images.mean(2).sum(0)  # 按通道计算均值
            std += images.std(2).sum(0)    # 按通道计算标准差
            total_images += batch_samples
    
    # 全局平均
    mean /= total_images
    std /= total_images
    return mean, std

mean, std = calculate_mean_std(train_dataset)
print(f"计算得到的均值: {mean.tolist()}")
print(f"计算得到的标准差: {std.tolist()}")

计算得到的均值: [0.48936358094215393, 0.45920294523239136, 0.4111420214176178]
计算得到的标准差: [0.22562411427497864, 0.22159235179424286, 0.22100557386875153]


In [8]:
# 数据预处理
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),  # 随机裁剪到224x224
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # 颜色抖动
    transforms.RandomApply([transforms.GaussianBlur(3)], p=0.5),  # 随机模糊
    transforms.ToTensor(),  # 转为Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 归一化
])

test_transform = transforms.Compose([
    transforms.Resize(256),  # 缩放到256x256
    transforms.CenterCrop(224),  # 中心裁剪到224x224
    transforms.ToTensor(),  # 转为Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 归一化
])

# 加载数据集
train_dataset = ImageFolder(root='imagenet_mini/imagenet-mini/train/', transform=train_transform)
test_dataset = ImageFolder(root='imagenet_mini/imagenet-mini/val/', transform=test_transform)

In [None]:
# create DataLoader
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
# val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=0, pin_memory=True)

In [9]:
import torch.nn as nn
import torch.nn.functional as F

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out

class ResNet18(nn.Module):
    def __init__(self, num_classes=500):
        super(ResNet18, self).__init__()
        self.in_channels = 224

        # 初始卷积层
        self.conv1 = nn.Conv2d(3, 224, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(224)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # 4 个阶段（stage）
        self.layer1 = self._make_layer(224, 256, 2)
        self.layer2 = self._make_layer(256, 512, 2, stride=2)
        # self.layer3 = self._make_layer(128, 256, 2, stride=2)
        # self.layer4 = self._make_layer(128, 256, 2, stride=2)

        # 全局平均池化和全连接层
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Linear(256, num_classes),
            nn.Dropout(0.5)  # 添加 Dropout
        )

    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )

        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride, downsample))
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        # x = self.layer3(x)
        # x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [10]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [11]:
def evaluate(model, data_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
    return running_loss / len(data_loader)

In [12]:
from torch.optim.lr_scheduler import _LRScheduler
import math

class WarmupCosineDecayLR(_LRScheduler):
    def __init__(self, optimizer, warmup_epochs, total_epochs, base_lr, final_lr, last_epoch=-1):
        self.warmup_epochs = warmup_epochs
        self.total_epochs = total_epochs
        self.base_lr = base_lr
        self.final_lr = final_lr
        super(WarmupCosineDecayLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.last_epoch < self.warmup_epochs:
            # 预热阶段：线性增加学习率
            return [self.base_lr * (self.last_epoch + 1) / self.warmup_epochs for _ in self.optimizer.param_groups]
        else:
            # 余弦衰减阶段
            progress = (self.last_epoch - self.warmup_epochs) / (self.total_epochs - self.warmup_epochs)
            cosine_decay = 0.5 * (1 + math.cos(math.pi * progress))
            return [self.final_lr + (self.base_lr - self.final_lr) * cosine_decay for _ in self.optimizer.param_groups]

In [13]:
from torchsummary import summary

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet18(num_classes=500).to(device)

summary(model, input_size=(3, 224,224))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x512 and 256x500)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)

# 定义学习率调度器
warmup_epochs = 5  # 预热 5 个 epoch
total_epochs = 50  # 总训练 epoch 数
base_lr = 0.001    # 初始学习率
final_lr = 0.00001 # 最终学习率
scheduler = WarmupCosineDecayLR(optimizer, warmup_epochs, total_epochs, base_lr, final_lr)

train_losses = []  # 记录每个 epoch 的训练损失
val_losses = []   # 记录每个 epoch 的测试损失

for epoch in range(total_epochs):
    # train
    train_loss = train(model, train_loader, criterion, optimizer, device)
    train_losses.append(train_loss)

    # validation
    val_loss = evaluate(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    print(f'Epoch {epoch+1}/{total_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f},  LR: {current_lr:.6f},')

Epoch 1/50, Train Loss: 5.4512, Val Loss: 5.6307,  LR: 0.000400,
Epoch 2/50, Train Loss: 5.1345, Val Loss: 5.7911,  LR: 0.000600,
Epoch 3/50, Train Loss: 4.9921, Val Loss: 5.8672,  LR: 0.000800,
Epoch 4/50, Train Loss: 4.8954, Val Loss: 5.9613,  LR: 0.001000,
Epoch 5/50, Train Loss: 4.8086, Val Loss: 6.0273,  LR: 0.001000,
Epoch 6/50, Train Loss: 4.7139, Val Loss: 6.2395,  LR: 0.000999,


KeyboardInterrupt: 