In [2]:
import torch
print("PyTorch版本:", torch.__version__)

# 测试Softmax
X = torch.tensor([[1.0, 2.0, 3.0]])
print("输入:", X)
print("Softmax输出:", torch.softmax(X, dim=1))

PyTorch版本: 2.8.0+cpu
输入: tensor([[1., 2., 3.]])
Softmax输出: tensor([[0.0900, 0.2447, 0.6652]])


In [3]:
import torch
import torch.nn as nn

print("=== 1. Softmax函数 ===")

def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition

# 测试
X = torch.tensor([[1.0, 2.0, 3.0], [1.0, 1.0, 1.0]])
print("输入:\n", X)
print("Softmax输出:\n", softmax(X))
print("每行和:", softmax(X).sum(1))

=== 1. Softmax函数 ===
输入:
 tensor([[1., 2., 3.],
        [1., 1., 1.]])
Softmax输出:
 tensor([[0.0900, 0.2447, 0.6652],
        [0.3333, 0.3333, 0.3333]])
每行和: tensor([1., 1.])


In [4]:
print("\n=== 2. 交叉熵损失 ===")

y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])  # 预测概率
y = torch.tensor([2, 0])  # 真实标签

print("预测概率:\n", y_hat)
print("真实标签:", y)

# 手动计算交叉熵
loss = -torch.log(y_hat[range(len(y_hat)), y])
print("每个样本的损失:", loss)
print("平均损失:", loss.mean().item())


=== 2. 交叉熵损失 ===
预测概率:
 tensor([[0.1000, 0.3000, 0.6000],
        [0.3000, 0.2000, 0.5000]])
真实标签: tensor([2, 0])
每个样本的损失: tensor([0.5108, 1.2040])
平均损失: 0.8573992252349854


In [5]:
print("\n=== 3. 模型定义 ===")

net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 10)  # FashionMNIST: 28*28=784输入, 10个类别
)

print("模型结构:")
print(net)

# 测试模型
X_test = torch.randn(3, 1, 28, 28)  # 3个样本
y_hat = net(X_test)
print("\n测试输入形状:", X_test.shape)
print("模型输出形状:", y_hat.shape)
print("预测概率:", torch.softmax(y_hat, dim=1))


=== 3. 模型定义 ===
模型结构:
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

测试输入形状: torch.Size([3, 1, 28, 28])
模型输出形状: torch.Size([3, 10])
预测概率: tensor([[0.1013, 0.1063, 0.1911, 0.0875, 0.0558, 0.0928, 0.0711, 0.0874, 0.0679,
         0.1388],
        [0.0524, 0.0703, 0.1627, 0.0727, 0.1238, 0.1463, 0.1098, 0.0629, 0.0809,
         0.1182],
        [0.1267, 0.0956, 0.1349, 0.1417, 0.0603, 0.0814, 0.0555, 0.1283, 0.0903,
         0.0853]], grad_fn=<SoftmaxBackward0>)


In [11]:
print("=== 4. 加载Fashion-MNIST数据 ===")

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 数据预处理
transform = transforms.ToTensor()
train_data = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_data = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# 创建数据加载器
batch_size = 256
train_iter = DataLoader(train_data, batch_size, shuffle=True)
test_iter = DataLoader(test_data, batch_size, shuffle=False)

print(f"训练样本数: {len(train_data)}")
print(f"测试样本数: {len(test_data)}")
print(f"批次大小: {batch_size}")
print("数据加载完成!")

=== 4. 加载Fashion-MNIST数据 ===
训练样本数: 60000
测试样本数: 10000
批次大小: 256
数据加载完成!


In [8]:
print("=== 5. 训练Softmax回归模型 ===")

import torch.optim as optim

# 初始化模型
net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 10)
)

# 损失函数和优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)

# 训练循环
epochs = 5
for epoch in range(epochs):
    total_loss, total_acc = 0, 0
    for X, y in train_iter:
        # 前向传播
        y_hat = net(X)
        loss = loss_fn(y_hat, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        total_acc += (y_hat.argmax(axis=1) == y).float().mean().item()
    
    print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_iter):.4f}, Acc: {total_acc/len(train_iter):.3f}')

print("训练完成!")

=== 5. 训练Softmax回归模型 ===
Epoch 1/5, Loss: 0.7853, Acc: 0.750
Epoch 2/5, Loss: 0.5699, Acc: 0.814
Epoch 3/5, Loss: 0.5256, Acc: 0.826
Epoch 4/5, Loss: 0.5015, Acc: 0.832
Epoch 5/5, Loss: 0.4858, Acc: 0.836
训练完成!


In [12]:
print("=== 6. 模型评估 ===")

net.eval()
test_correct, test_total = 0, 0

with torch.no_grad():
    for X, y in test_iter:
        y_hat = net(X)
        test_correct += (y_hat.argmax(axis=1) == y).sum().item()
        test_total += y.size(0)

print(f"测试集准确率: {test_correct/test_total:.3f}")

# 类别名称
fashion_classes = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat',
                   'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print("\n类别名称:", fashion_classes)

=== 6. 模型评估 ===
测试集准确率: 0.817

类别名称: ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
