In [1]:
from datasets import load_dataset
from torchvision.transforms import Compose, RandomResizedCrop, RandomHorizontalFlip, Resize, CenterCrop, ToTensor, Normalize

dataset = load_dataset("imagefolder", data_dir="D:/Study/imagenet_mini/imagenet-mini/")
train_ds = dataset["train"]
val_ds = dataset["test"]

Resolving data files:   0%|          | 0/34745 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/3923 [00:00<?, ?it/s]

In [2]:
from transformers import AutoImageProcessor
from torchvision.transforms import ColorJitter, RandomRotation, GaussianBlur
from torch.utils.data import DataLoader

processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50", cache_dir = "D:/Study/microsoft/")
size = processor.size["shortest_edge"]

normalize = Normalize(mean=processor.image_mean, std=processor.image_std)

train_transforms = Compose([
    RandomResizedCrop(size, scale=(0.6, 1.0)),  # 扩大随机裁剪范围
    RandomHorizontalFlip(),
    ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # 颜色抖动
    RandomRotation(15),          # 随机旋转 ±15度
    GaussianBlur(kernel_size=3), # 轻微模糊
    ToTensor(),
    normalize,
])

val_transforms = Compose([
    Resize(256),
    CenterCrop(size),
    ToTensor(),
    normalize,
])

# 应用预处理函数
def preprocess_train(examples):
    examples['pixel_values'] = [train_transforms(img.convert('RGB')) for img in examples['image']]
    return examples

def preprocess_val(examples):
    examples['pixel_values'] = [val_transforms(img.convert('RGB')) for img in examples['image']]
    return examples

# 修改数据集加载方式（动态处理，避免内存缓存）
train_dataset = train_ds.with_transform(preprocess_train)  # 替换 map
val_dataset = val_ds.with_transform(preprocess_val)

# 调整 DataLoader 配置（启用多进程加速）
batch_size = 256  # 增大批量大小以利用 GPU 并行计算
num_workers = 0   # 根据 CPU 核心数调整

def collate_fn(batch):
    pixel_values = torch.stack([item["pixel_values"] for item in batch])
    labels = torch.tensor([item["label"] for item in batch])
    return {"pixel_values": pixel_values, "label": labels}

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    collate_fn=collate_fn,
    pin_memory=True,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    collate_fn=collate_fn,
    pin_memory=True,
)

# train_dataset.with_format("torch", columns=["pixel_values", "label"])
# val_dataset.with_format("torch", columns=["pixel_values", "label"])

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [3]:
from transformers import ResNetForImageClassification

# 加载模型
model = ResNetForImageClassification.from_pretrained(
    "microsoft/resnet-18",
    cache_dir = "D:/Study/microsoft/",
    num_labels=1000,
    ignore_mismatched_sizes=True
)

# 冻结所有层（默认情况下参数是requires_grad=True）
for param in model.parameters():
    param.requires_grad = False

# # 仅解冻分类头（最后一层）
for param in model.classifier.parameters():
    param.requires_grad = True

# 或者解冻最后几个阶段（例如stage3）
# for name, param in model.named_parameters():
#     if "stages.3" in name:  # 根据ResNet结构选择解冻的层
#         param.requires_grad = True

In [4]:
import torch
from torch.cuda.amp import autocast, GradScaler
from torch import nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 10  # 根据需求调整
best_val_acc = 0.0
total_time = 0.0

# 交叉熵损失函数
loss_fn = nn.CrossEntropyLoss()

# 筛选需要训练的参数
trainable_params = [p for p in model.parameters() if p.requires_grad]

# 使用AdamW优化器，学习率根据任务调整
optimizer = AdamW(trainable_params, lr=5e-4, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=epochs, eta_min=1e-6)

scaler = GradScaler()  # 在训练循环外初始化

# 修改训练循环中的前向和反向传播
for batch in train_loader:
    inputs = batch["pixel_values"].to(device)
    labels = batch["label"].to(device)
    
    optimizer.zero_grad()
    
    # 启用混合精度
    with autocast():
        outputs = model(inputs)
        loss = loss_fn(outputs.logits, labels)
    
    # 缩放梯度并反向传播
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()

for epoch in range(epochs):
    start_time = time.time()
    # 训练阶段
    model.train()
    train_loss = 0.0
    for batch in train_loader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["label"].to(device)
        
        optimizer.zero_grad()
        # 启用混合精度
        with autocast():
            outputs = model(inputs)
            loss = loss_fn(outputs.logits, labels)
        
        # 缩放梯度并反向传播
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()
    
    # 验证阶段
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs = batch["pixel_values"].to(device)
            labels = batch["label"].to(device)
            
            outputs = model(inputs)
            loss = loss_fn(outputs.logits, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs.logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_time = time.time() - start_time
    total_time += epoch_time
    
    # 计算指标
    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)
    val_acc = correct / total
    
    print(
        f'Epoch {epoch+1}/{epochs}, '
        f'Train Loss: {avg_train_loss:.4f}, '
        f'Val Loss: {avg_val_loss:.4f}, '
        f'Val Acc: {val_acc:.4f}, '
        f'Epoch Time: {epoch_time:.2f}s, '
        f'Total Time: {total_time:.2f}s'
    )
    # 保存最佳模型
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "resnet-18_classfication_imagenet_mini.pth")
        print("Saved best model!")



Epoch 1/10, Train Loss: 1.5071, Val Loss: 1.4569, Val Acc: 0.6449, Epoch Time: 753.68s, Total Time: 753.68s
Saved best model!
Epoch 2/10, Train Loss: 1.4233, Val Loss: 1.4542, Val Acc: 0.6492, Epoch Time: 790.46s, Total Time: 1544.15s
Saved best model!
Epoch 3/10, Train Loss: 1.3577, Val Loss: 1.4659, Val Acc: 0.6464, Epoch Time: 795.17s, Total Time: 2339.32s
Epoch 4/10, Train Loss: 1.3165, Val Loss: 1.4668, Val Acc: 0.6459, Epoch Time: 799.89s, Total Time: 3139.21s
Epoch 5/10, Train Loss: 1.2576, Val Loss: 1.4829, Val Acc: 0.6385, Epoch Time: 757.98s, Total Time: 3897.19s
Epoch 6/10, Train Loss: 1.2069, Val Loss: 1.4982, Val Acc: 0.6408, Epoch Time: 838.91s, Total Time: 4736.10s
Epoch 7/10, Train Loss: 1.1719, Val Loss: 1.5085, Val Acc: 0.6347, Epoch Time: 836.07s, Total Time: 5572.16s
Epoch 8/10, Train Loss: 1.1348, Val Loss: 1.5102, Val Acc: 0.6332, Epoch Time: 846.89s, Total Time: 6419.05s
Epoch 9/10, Train Loss: 1.0998, Val Loss: 1.5246, Val Acc: 0.6337, Epoch Time: 842.30s, Total