手动实现一个 Adam 优化器，并与 PyTorch 官方实现进行对比，通过训练一个简单的线性回归模型来观察结果差异，以此深化对 Adam 优化器的理解。

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt


# 手动实现 Adam 优化器
class ManualAdam:
    def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
        """
        初始化手动实现的 Adam 优化器
        :param params: 待优化的模型参数
        :param lr: 学习率
        :param betas: 一阶矩和二阶矩的衰减率，通常为 (0.9, 0.999)
        :param eps: 一个小的常数，防止除零错误
        """
        self.params = list(params)
        self.lr = lr
        self.beta1, self.beta2 = betas
        self.eps = eps
        self.t = 0
        # 初始化一阶矩估计
        self.ms = [torch.zeros_like(p) for p in self.params]
        # 初始化二阶矩估计
        self.vs = [torch.zeros_like(p) for p in self.params]

    def step(self):
        """
        执行一次参数更新步骤
        """
        self.t += 1
        for i, p in enumerate(self.params):
            if p.grad is None:
                continue
            grad = p.grad.data
            # 更新一阶矩估计
            self.ms[i] = self.beta1 * self.ms[i] + (1 - self.beta1) * grad
            # 更新二阶矩估计
            self.vs[i] = self.beta2 * self.vs[i] + (1 - self.beta2) * (grad ** 2)
            # 偏差修正
            m_hat = self.ms[i] / (1 - self.beta1 ** self.t)
            v_hat = self.vs[i] / (1 - self.beta2 ** self.t)
            # 更新参数
            p.data = p.data - self.lr * m_hat / (torch.sqrt(v_hat) + self.eps)

    def zero_grad(self):
        """
        清空所有参数的梯度
        """
        for p in self.params:
            if p.grad is not None:
                p.grad.detach_()
                p.grad.zero_()


# 定义一个简单的线性回归模型
class LinearRegression(nn.Module):
    def __init__(self):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        return self.linear(x)


# 生成一些示例数据
np.random.seed(42)
x = torch.tensor(np.random.randn(100, 1), dtype=torch.float32)
y = 2 * x + 1 + 0.1 * torch.tensor(np.random.randn(100, 1), dtype=torch.float32)

# 使用手动实现的 Adam 优化器训练模型
model_manual = LinearRegression()
criterion = nn.MSELoss()
optimizer_manual = ManualAdam(model_manual.parameters())

manual_losses = []
for epoch in range(200):
    optimizer_manual.zero_grad()
    outputs = model_manual(x)
    loss = criterion(outputs, y)
    manual_losses.append(loss.item())
    loss.backward()
    optimizer_manual.step()

# 使用 PyTorch 官方的 Adam 优化器训练模型
model_pytorch = LinearRegression()
optimizer_pytorch = torch.optim.Adam(model_pytorch.parameters())

pytorch_losses = []
for epoch in range(200):
    optimizer_pytorch.zero_grad()
    outputs = model_pytorch(x)
    loss = criterion(outputs, y)
    pytorch_losses.append(loss.item())
    loss.backward()
    optimizer_pytorch.step()

# 绘制损失曲线对比
plt.plot(manual_losses, label='Manual Adam')
plt.plot(pytorch_losses, label='PyTorch Adam')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Comparison between Manual Adam and PyTorch Adam')
plt.legend()
plt.show()