In [1]:
import os

def print_directory_structure(path):
    print(f"分析目录: {path}")
    for root, dirs, files in os.walk(path):
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * 4 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for f in files[:5]:  # 只显示前5个文件
            print(f"{subindent}{f}")
        if len(files) > 5:
            print(f"{subindent}...")

print_directory_structure("minist_dataset")

分析目录: minist_dataset
minist_dataset/
    minist_result.csv
    our_testing_without_labels/
        16585.png
        8820.png
        14392.png
        9280.png
        15932.png
        ...
    training/
        9/
            36655.png
            32433.png
            28319.png
            4968.png
            23502.png
            ...
        0/
            16585.png
            24537.png
            25629.png
            20751.png
            34730.png
            ...
        7/
            30224.png
            27758.png
            29775.png
            26446.png
            15932.png
            ...
        6/
            59292.png
            17843.png
            45863.png
            8820.png
            9280.png
            ...
        1/
            10692.png
            30556.png
            10686.png
            58198.png
            16591.png
            ...
        8/
            20037.png
            34042.png
            348.png
            12085.png
            3235

In [2]:
import os
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class MNISTDataset(Dataset):
    def __init__(self, root_dir, is_train=True):
        self.root_dir = root_dir
        self.is_train = is_train
        self.transform = transforms.Compose([
            transforms.Grayscale(),  # 转换为灰度图
            transforms.Resize((28, 28)),  # MNIST标准大小
            transforms.ToTensor(),  # 转换为tensor
            transforms.Normalize((0.5,), (0.5,))  # 标准化
        ])
        
        self.data = []
        if is_train:
            # 训练集: 从training/目录加载
            for label in range(10):
                label_dir = os.path.join(root_dir, "training", str(label))
                for img_name in os.listdir(label_dir):
                    self.data.append((os.path.join(label_dir, img_name), label))
        else:
            # 测试集: 从our_testing_without_labels/目录加载
            test_dir = os.path.join(root_dir, "our_testing_without_labels")
            for img_name in os.listdir(test_dir):
                self.data.append((os.path.join(test_dir, img_name), img_name))
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path)
        image = self.transform(image)
        
        if self.is_train:
            return image, label
        else:
            return image, label  # 测试集返回图片名称而不是标签

# 测试数据加载
def test_dataset():
    # 创建训练集
    train_dataset = MNISTDataset("minist_dataset", is_train=True)
    print(f"训练集大小: {len(train_dataset)}")
    
    # 创建测试集
    test_dataset = MNISTDataset("minist_dataset", is_train=False)
    print(f"测试集大小: {len(test_dataset)}")
    
    # 测试一个样本
    image, label = train_dataset[0]
    print(f"图片张量形状: {image.shape}")
    print(f"标签: {label}")

test_dataset()

训练集大小: 60000
测试集大小: 10000
图片张量形状: torch.Size([1, 28, 28])
标签: 0


In [3]:
import torch.nn as nn
import torch.nn.functional as F

class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        # 第一个卷积层：1通道输入，32通道输出，3x3卷积核
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        # 第二个卷积层：32通道输入，64通道输出，3x3卷积核
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # 全连接层
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        # Dropout层，防止过拟合
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        # 第一个卷积块
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)  # 14x14
        
        # 第二个卷积块
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)  # 7x7
        
        # 展平
        x = x.view(-1, 64 * 7 * 7)
        
        # 全连接层
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)

# 测试模型
def test_model():
    # 创建模型
    model = MNISTNet()
    
    # 创建一个测试输入
    test_input = torch.randn(1, 1, 28, 28)
    
    # 前向传播
    output = model(test_input)
    
    print(f"模型输出形状: {output.shape}")
    print(f"模型参数总量: {sum(p.numel() for p in model.parameters())}")
    
    # 打印模型结构
    print("\n模型结构:")
    print(model)

test_model()

模型输出形状: torch.Size([1, 10])
模型参数总量: 421642

模型结构:
MNISTNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [4]:
import torch.optim as optim
from torch.utils.data import DataLoader
import time

def train_model():
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    # 创建模型和数据加载器
    model = MNISTNet().to(device)
    train_dataset = MNISTDataset("minist_dataset", is_train=True)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    
    # 定义损失函数和优化器
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # 训练参数
    n_epochs = 5
    
    # 训练循环
    print("开始训练...")
    for epoch in range(n_epochs):
        model.train()
        total_loss = 0
        start_time = time.time()
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            
            # 前向传播
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            # 反向传播
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            # 打印进度
            if batch_idx % 100 == 0:
                print(f'Epoch {epoch+1}/{n_epochs} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                      f'({100. * batch_idx / len(train_loader):.0f}%)]\t'
                      f'Loss: {loss.item():.6f}')
        
        # 打印每个epoch的统计信息
        epoch_time = time.time() - start_time
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch+1}/{n_epochs} 完成 - '
              f'平均损失: {avg_loss:.6f}, '
              f'用时: {epoch_time:.2f}秒')
    
    print("训练完成！")
    
    # 保存模型
    torch.save(model.state_dict(), 'mnist_model.pth')
    print("模型已保存为 mnist_model.pth")

train_model()

使用设备: cpu
开始训练...
Epoch 1/5 完成 - 平均损失: 0.266055, 用时: 37.48秒
Epoch 2/5 完成 - 平均损失: 0.098256, 用时: 37.95秒
Epoch 3/5 完成 - 平均损失: 0.075076, 用时: 38.22秒
Epoch 4/5 完成 - 平均损失: 0.061636, 用时: 36.91秒
Epoch 5/5 完成 - 平均损失: 0.049373, 用时: 36.67秒
训练完成！
模型已保存为 mnist_model.pth


In [5]:
import pandas as pd

def predict_and_save():
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # 加载模型
    model = MNISTNet().to(device)
    model.load_state_dict(torch.load('mnist_model.pth'))
    model.eval()
    
    # 加载测试数据
    test_dataset = MNISTDataset("minist_dataset", is_train=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # 预测结果
    results = []
    print("开始预测...")
    
    with torch.no_grad():
        for data, img_names in test_loader:
            data = data.to(device)
            output = model(data)
            pred = output.max(1, keepdim=True)[1]
            
            # 保存预测结果
            for idx, p in enumerate(pred):
                results.append({
                    'image_name': img_names[idx],
                    'predicted_num': p.item()
                })
    
    # 创建DataFrame并保存为CSV
    df = pd.DataFrame(results)
    df.to_csv('minist_dataset/minist_result.csv', index=False)
    
    print(f"预测完成！共预测{len(results)}张图片")
    print("预测结果已保存到 minist_dataset/minist_result.csv")
    
    # 显示前几个预测结果
    print("\n前5个预测结果:")
    print(df.head())

predict_and_save()

  model.load_state_dict(torch.load('mnist_model.pth'))


开始预测...
预测完成！共预测10000张图片
预测结果已保存到 minist_dataset/minist_result.csv

前5个预测结果:
  image_name  predicted_num
0  16585.png              2
1   8820.png              7
2  14392.png              8
3   9280.png              9
4  15932.png              1


In [6]:
import torch
from torch.utils.data import DataLoader
import numpy as np

def evaluate_on_training():
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # 加载模型
    model = MNISTNet().to(device)
    model.load_state_dict(torch.load('mnist_model.pth'))
    model.eval()
    
    # 加载训练数据
    train_dataset = MNISTDataset("minist_dataset", is_train=True)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    
    correct = 0
    total = 0
    predictions = []
    true_labels = []
    
    print("开始评估...")
    
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.max(1, keepdim=True)[1].squeeze()
            
            correct += pred.eq(target).sum().item()
            total += target.size(0)
            
            predictions.extend(pred.cpu().numpy())
            true_labels.extend(target.cpu().numpy())
    
    accuracy = 100. * correct / total
    print(f"\n模型在训练集上的准确率: {accuracy:.2f}%")
    
    # 计算每个数字的准确率
    for digit in range(10):
        mask = np.array(true_labels) == digit
        if mask.sum() > 0:
            digit_accuracy = 100. * np.sum(np.array(predictions)[mask] == digit) / mask.sum()
            print(f"数字 {digit} 的准确率: {digit_accuracy:.2f}%")
    
    return predictions, true_labels

evaluate_on_training()

  model.load_state_dict(torch.load('mnist_model.pth'))


开始评估...

模型在训练集上的准确率: 99.17%
数字 0 的准确率: 99.43%
数字 1 的准确率: 98.55%
数字 2 的准确率: 99.51%
数字 3 的准确率: 99.28%
数字 4 的准确率: 98.82%
数字 5 的准确率: 98.76%
数字 6 的准确率: 99.81%
数字 7 的准确率: 98.88%
数字 8 的准确率: 99.64%
数字 9 的准确率: 99.13%


([0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  8,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
