In [21]:
import torch
import torch.nn as nn

# 定义一个简单的神经网络模型
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        # 定义模型的层次
        self.fc1 = nn.Linear(28 * 28, 128)  # 第一层，全连接层
        self.fc2 = nn.Linear(128, 10)  # 第二层，输出层，10个类别
    
    def forward(self, x):
        # 前向传播定义
        x = torch.flatten(x, 1)  # 展平输入图片 (batch_size, 28, 28) -> (batch_size, 28*28)
        x = torch.relu(self.fc1(x))  # 通过第一层并应用ReLU激活函数
        x = self.fc2(x)  # 通过第二层得到输出
        return x

In [23]:

# 创建模型实例
model = SimpleNN()

# 打印模型结构
print("Model structure:")
print(model)

# 查看模型参数
print("\nModel parameters:")
for param in model.parameters():
    print(param.shape)
for name, param in model.named_parameters():
    print(f"{name}: {param.shape}")


Model structure:
SimpleNN(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

Model parameters:
torch.Size([128, 784])
torch.Size([128])
torch.Size([10, 128])
torch.Size([10])
fc1.weight: torch.Size([128, 784])
fc1.bias: torch.Size([128])
fc2.weight: torch.Size([10, 128])
fc2.bias: torch.Size([10])


In [None]:

# 选择设备: GPU/CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
# 假设训练数据
inputs = torch.randn(32, 28, 28).to(device)  # Batch size: 32, 每个输入为28x28的图片
labels = torch.randint(0, 10, (32,)).to(device)  # 32个样本的标签，范围在0到9之间

# 训练模式
model.train()
outputs = model(inputs)  # 前向传播
loss = criterion(outputs, labels)  # 计算损失
loss.backward()  # 反向传播

# 在评估模式下，不会计算梯度
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()  # 这里不会更新参数，因为没有梯度计算
print("\nEvaluation outputs:", outputs)
for param in model.parameters():
    print("Parameter grad:", param.grad)
    
    
    
# model.eval()
# for param in model.parameters():
#     print("Parameter requires_grad:", param.requires_grad)
with torch.no_grad():
    for param in model.parameters():
        print("Parameter requires_grad:", param.requires_grad)
        


Evaluation outputs: tensor([[-7.3347e-02, -1.1473e-01,  3.2109e-01,  2.4325e-01, -1.7589e-01,
         -1.3836e-01, -5.6278e-02,  7.3760e-02,  1.0805e-01,  1.1136e-01],
        [-1.7091e-01, -9.7208e-02,  4.7650e-01, -8.6144e-02, -2.3651e-02,
         -2.5933e-01,  2.8517e-01,  2.7950e-01,  2.5820e-01, -1.3152e-01],
        [ 2.6194e-02, -3.1539e-01,  1.7690e-01, -3.2742e-01, -2.3170e-01,
          1.8872e-01,  7.2235e-02, -1.9280e-01, -3.2548e-03, -1.6300e-01],
        [-1.6516e-01, -3.7891e-01,  3.2005e-01,  1.8289e-01, -2.8213e-01,
         -7.8919e-02,  2.3430e-01, -2.2160e-01,  2.3786e-01,  1.5226e-01],
        [-4.1077e-01, -3.5415e-01,  1.7021e-01,  1.6924e-01, -1.1865e-01,
         -2.6454e-01,  2.2788e-01, -1.1992e-01, -5.7016e-02, -1.5884e-01],
        [-1.6895e-01, -1.8471e-02,  4.4429e-01,  1.6341e-01, -7.5507e-02,
         -7.2241e-02,  7.4753e-02, -6.4325e-02,  3.1251e-01,  9.8277e-02],
        [-1.9612e-01, -2.1791e-01,  1.9810e-01,  1.4333e-01, -2.0286e-01,
          1

In [None]:
"""
compare:
strict=False vs strict=True
"""
# 保存模型
torch.save(model.state_dict(), 'simple_nn.pth')
# 加载模型
model_loaded = SimpleNN()
model_loaded.test = torch.nn.Linear(28 * 28, 10)  # 确保加载的模型有相同的结构

missing,unexpected =model_loaded.load_state_dict(torch.load('simple_nn.pth'), strict=False)



Missing keys: ['test.weight', 'test.bias']
Unexpected keys: []


In [None]:

"""
Compare:
- .eval()
- with torch.no_grad()
- .requires_grad_(False)
"""
model.eval()  # 评估模式
for param in model.parameters():
    print(param.requires_grad)
    
model.requires_grad_(False)  # 禁用梯度计算
for param in model.parameters():
    print(param.requires_grad)
    
model.requires_grad_(True)  # 恢复梯度计算
    
inputs = torch.randn(32, 28, 28).to(device,dtype = torch.float32)  # Batch size: 32, 每个输入为28x28的图片
outputs = model(inputs)  # 前向传播
print(outputs.grad)
loss = torch.nn.MSELoss()(outputs,outputs*0)
loss.backward()  # 反向传播
for param in model.parameters():
    print(param.requires_grad)
    

    


False
False
False
False
False
False
False
False
None
True
True
True
True


  print(outputs.grad)


'\nCompare:\n'

In [16]:

# 使用apply()来应用初始化
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_normal_(m.weight)

model.apply(init_weights)  # 应用权重初始化方法


SimpleNN(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)