## 模型定义（ResNet + VGG）

In [1]:
import os
import cv2
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score


In [2]:
def initialize_model(model_name, num_classes, use_pretrained=True):
    model = None
    input_size = 224  # 标准输入尺寸

    if model_name == "resnet":
        # ResNet-18
        model = models.resnet18(pretrained=use_pretrained)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)
    elif model_name == "vgg":
        # VGG16
        model = models.vgg16(pretrained=use_pretrained)
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_ftrs, num_classes)
    else:
        raise ValueError("不支持的模型名称，请选择 'resnet' 或 'vgg'")

    return model, input_size

## 自定义数据集类

In [3]:
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        # OpenCV读取的是BGR，需转为RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image)

        return image, label

## 数据预处理 + 数据增强

In [4]:
def get_transforms(input_size=224):
    train_transform = transforms.Compose([
        transforms.ToPILImage(),  # 转为PIL图像
        transforms.RandomResizedCrop(input_size),  # 随机缩放裁剪
        transforms.RandomHorizontalFlip(),  # 随机水平翻转
        transforms.RandomRotation(15),  # 随机旋转
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    test_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    return train_transform, test_transform

## 主程序入口（加载数据、创建模型和训练）

In [5]:

# 重写数据划分逻辑，确保 80% 训练 20% 测试，共 12000 张图片
from sklearn.model_selection import train_test_split
import random
import os
from pathlib import Path

base_path = Path("Aerial_Landscapes")
categories = sorted(os.listdir(base_path))
categories = [cls for cls in categories if not cls.startswith(".")]  # 排除隐藏文件夹
class_to_idx = {cls: idx for idx, cls in enumerate(categories)}

image_paths, labels = [], []
for cls in categories:
    cls_folder = base_path / cls
    paths = list(cls_folder.glob("*.jpg"))
    image_paths += paths
    labels += [class_to_idx[cls]] * len(paths)

combined = list(zip(image_paths, labels))
random.shuffle(combined)
image_paths[:], labels[:] = zip(*combined)

train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=42
)

print(f"训练图像数量: {len(train_paths)}，测试图像数量: {len(test_paths)}")


训练图像数量: 9600，测试图像数量: 2400


In [6]:

import torchvision.models as models

# 定义 ResNet18 模型（若有修改，请根据需要更改）
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, num_classes)  # 注意：需要预定义 num_classes




NameError: name 'num_classes' is not defined

In [7]:

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt

# 加载最优模型
model.load_state_dict(torch.load("checkpoint.pth"))
model.eval()

# 收集所有预测结果
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

# 输出评估指标
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

print(f"准确率 (Accuracy): {accuracy:.4f}")
print(f"精确率 (Precision): {precision:.4f}")
print(f"召回率 (Recall): {recall:.4f}")
print(f"F1 分数 (F1-score): {f1:.4f}")
print("\n分类报告:\n", classification_report(all_labels, all_preds, target_names=classes))

# 混淆矩阵可视化
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt="d", xticklabels=classes, yticklabels=classes, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()


  model.load_state_dict(torch.load("checkpoint.pth"))


FileNotFoundError: [Errno 2] No such file or directory: 'checkpoint.pth'