## task03 尝试不同optimizer对模型进行训练，观察对比loss结果。

In [None]:
#导入包
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# 加载数据
faces = fetch_olivetti_faces()
X = faces.data  # [400, 4096] 数据集
y = faces.target  # [400] 类别

# 划分成训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    stratify=y,  # 分层抽样确保训练集和测试集中的类别分布与原始数据集一致
    random_state=0
)

# 将数据集转化为张量
X_train_tensor = torch.from_numpy(X_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_train_tensor = torch.from_numpy(y_train).long()
y_test_tensor = torch.from_numpy(y_test).long()

# 定义超参数
LR = 1e-3
epochs = 20
BATCH_SIZE = 128

# 分批次处理图片数据并打乱
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
#TensorDataset用于将多个张量（Tensor）组合成一个可迭代数据集的核心工具，
#其核心作用是实现数据与标签的配对管理，便于后续模型训练的批量加载和处理。

train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

test_dl = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

# 定义模型参数
input_size = 4096
hidden_size = 2048 
num_classes = 40


# 模型结构
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        # 输入层（展平后的特征） -> 隐藏层1
        self.f1 = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.BatchNorm1d(hidden_size),  # 归一化
            nn.ReLU(),                   # 激活函数
            nn.Dropout(0.5)              # 正则化（Dropout）
        )
        # 隐藏层1 -> 隐藏层2(1024)
        self.f2 = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.BatchNorm1d(hidden_size // 2),
            nn.ReLU(),
           # nn.Dropout(0.5)
        )
        # 输出层
        self.out = nn.Linear(hidden_size // 2, num_classes)

    def forward(self, x):
        out1 = self.f1(x)
        out2 = self.f2(out1)
        output = self.out(out2)
        return output


# 定义损失函数和优化器
loss_fn = nn.CrossEntropyLoss()
#optimizer = optim.Adam(face_modle.parameters(), lr=LR)

# 训练循环
def train_iter(epochs, model, optimizer, train_dl):
    train_hist = []  # 初始化损失历史记录
    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        for data, target in train_dl:
            optimizer.zero_grad()  # 清空梯度
            outputs = model(data)  # 前向传播
            loss = loss_fn(outputs, target)  # 计算损失
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
        train_hist.append(loss.item())  # 记录损失值
    return train_hist
# 画图
def draw_train_hist(hist_list):
    for i,hist in enumerate(hist_list):   
        plt.plot(hist, label=f'optimizer{i+1}')
    plt.legend()
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.show()
# 测试函数
def test_acc(test_dl, model):
    correct = 0
    total = 0
    with torch.no_grad():  # 不计算梯度
        for data, target in test_dl:
            output = model(data)
            _, predicted = torch.max(output, 1)  # 返回每行最大值和索引
            total += target.size(0)  # size(0) 等效 shape[0]
            correct += (predicted == target).sum().item()
    return correct / total * 100

# 获得该模型对象
face_model = Model(input_size, hidden_size, num_classes)

# 定义优化器
optimizer1 = optim.Adam(face_model.parameters(), lr=LR)
optimizer2 = optim.SGD(face_model.parameters(), lr=LR)
optimizer3 = optim.RMSprop(face_model.parameters(), lr=LR)  # RMSprop 是 PyTorch 中的优化器
optimizers = [optimizer1, optimizer2, optimizer3]

# 训练模型并记录损失历史
train_hists = [
    train_iter(epochs, face_model, optimizer, train_dl)
    for optimizer in optimizers
]

# 画图
draw_train_hist(train_hists)

# 测试模型
accuracy = test_acc(test_dl, face_model)
print(f'Test Accuracy: {accuracy:.2f}%')