## 读取数据

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import gzip
import os
import struct
import numpy as np

def load_mnist(path, kind='train'):
    """加载MNIST数据集"""
    labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte.gz')
    images_path = os.path.join(path, f'{kind}-images-idx3-ubyte.gz')

    with gzip.open(labels_path, 'rb') as lbpath:
        struct.unpack('>II', lbpath.read(8))
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8)

    with gzip.open(images_path, 'rb') as imgpath:
        struct.unpack('>IIII', imgpath.read(16))
        images = np.frombuffer(imgpath.read(), dtype=np.uint8).reshape(len(labels),1,28,28)

    return images, labels



# 数据集划分
def data_split(images, labels, ratio):
    
    total_len = images.shape[0]
    offset = int(total_len * ratio)

    val_img = images[:offset][:]
    val_lb = labels[:offset]

    train_img = images[offset:][:]
    train_lb = labels[offset:]

    return train_img, train_lb, val_img, val_lb    

# 读取训练集和测试集数据
[images, labels] = load_mnist('./MNIST', kind='train')
[test_img, test_lb] = load_mnist('./MNIST',kind='test')
train_img, train_lb, val_img, val_lb = data_split(images, labels, 1/6)

# 为了加快调试速度，从训练集选择2000个样本。
random_numbers = np.random.randint(50000, size=(10000, ))
train_img=train_img[random_numbers]
train_lb= train_lb[random_numbers]

# 将所有数据归一化到0-1之间
train_img =train_img/255.
val_img   =val_img/255.
test_img  =test_img/255.

# 对标签进行热编码
one_hot_train_lb = np.eye(10)[train_lb]
one_hot_val_lb = np.eye(10)[val_lb]
one_hot_test_lb= np.eye(10)[test_lb]

# 打印查看数据集格式
print('训练集图像格式为:', train_img.shape, '训练集标签格式为:', train_lb.shape,'热编码训练集标签格式为:', one_hot_train_lb.shape)
print('验证集图像格式为:', val_img.shape, '验证集标签格式为:', val_lb.shape,'热编码验证集标签格式为:', one_hot_val_lb.shape)
print('测试集图像格式为:', test_img.shape, '测试集标签格式为:', test_lb.shape,'热编码测试集标签格式为:', one_hot_test_lb.shape)

训练集图像格式为: (10000, 1, 28, 28) 训练集标签格式为: (10000,) 热编码训练集标签格式为: (10000, 10)
验证集图像格式为: (10000, 1, 28, 28) 验证集标签格式为: (10000,) 热编码验证集标签格式为: (10000, 10)
测试集图像格式为: (10000, 1, 28, 28) 测试集标签格式为: (10000,) 热编码测试集标签格式为: (10000, 10)


## 数据增强处理

In [2]:

from torchvision.transforms import v2
from torchvision.io import read_image
train_img1=torch.tensor(train_img, dtype=torch.float32)

transforms = v2.Compose([
    v2.RandomHorizontalFlip(p=0.5),
    v2.Normalize(mean=[0.485], std=[0.229]),
    v2.RandomRotation(degrees=(-45, 45))
    ])

augmented_sample = transforms(train_img1)
train_img = torch.cat((augmented_sample,train_img1))
one_hot_train_lb  = np.concatenate((one_hot_train_lb,one_hot_train_lb),axis=0)
print(train_img.shape)

torch.Size([20000, 1, 28, 28])


In [3]:
np.savez("dataset",train_img,one_hot_train_lb,test_img,one_hot_train_lb)

程序改错，构建并训练一个卷积网络

In [3]:
#网络包含3个卷积层、2个线性层、3个BN层
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv2d(in_channels=1,out_channels=10, kernel_size=5)
        self.conv2=nn.Conv2d(in_channels=10,out_channels=20,kernel_size=5)
        self.conv3=nn.Conv2d(in_channels=20,out_channels=10,kernel_size=5)
        self.w1 =nn.Linear(16*16*10,100)
        self.w2 =nn.Linear(100,10)
        self.BN1=nn.BatchNorm2d(10)
        self.BN2=nn.BatchNorm2d(20)
        self.BN3=nn.BatchNorm2d(10)
        self.relu=nn.ReLU()
        #self.drop=nn.Dropout()


    def forward(self, x):
        x = self.conv1 (x)
        x = self.BN1(x)
        x = self.relu(x)
        #x= self.drop(x)
        
        x = self.conv2 (x)
        x = self.BN2(x)
        x = self.relu(x)

        #x=self.drop(x)
        
        x = self.conv3 (x)
        x = self.BN3(x)
        x = self.relu(x)
        #x=self.drop(x)
        
        x = x.view(x.size(0), -1)
        x = self.w1 (x)
        x = self.relu(x)

        #x=self.drop(x)
        
        x = self.w2 (x)
        x = self.relu(x)        
        #x=self.drop(x)
        return x

model = NeuralNetwork()

# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

learning_rate = 5e-3
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

batch_size = 200
epochs = 10
batch_num=int(train_img.shape[0]/batch_size)
size = len(train_img)

model.train()
for t in range(epochs):
    
    correct=0.
    train_mean_loss=0.

    for batch in range(batch_num):
        X=train_img[batch*batch_size:(batch+1)*batch_size,]
        y=one_hot_train_lb[batch*batch_size:(batch+1)*batch_size,:]

        X=torch.tensor(X, dtype=torch.float32)
        y=torch.tensor(y, dtype=torch.float32)
        
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        correct += (pred.argmax(1) == y.argmax(1)).type(torch.float).mean().item()
        train_mean_loss+= loss.item()

    train_mean_loss /= batch_num
    correct /= batch_num
    
    print(f" Epoch:{t+1}, loss: {train_mean_loss:>8f},  Accuracy: {(100*correct):>0.1f}%")


  X=torch.tensor(X, dtype=torch.float32)


 Epoch:1, loss: 2.014015,  Accuracy: 40.6%
 Epoch:2, loss: 1.265292,  Accuracy: 68.5%
 Epoch:3, loss: 0.731226,  Accuracy: 82.1%
 Epoch:4, loss: 0.468146,  Accuracy: 89.2%
 Epoch:5, loss: 0.343721,  Accuracy: 91.8%
 Epoch:6, loss: 0.274751,  Accuracy: 93.3%
 Epoch:7, loss: 0.231780,  Accuracy: 94.2%
 Epoch:8, loss: 0.202473,  Accuracy: 94.7%
 Epoch:9, loss: 0.181034,  Accuracy: 95.3%
 Epoch:10, loss: 0.164643,  Accuracy: 95.6%


In [4]:
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
        X=torch.tensor(test_img, dtype=torch.float32)
        y=torch.tensor(one_hot_test_lb, dtype=torch.float32)
        pred = model(X)
        test_loss = np.mean(loss_fn(pred, y).item())
        correct = (pred.argmax(1) == y.argmax(1)).type(torch.float).mean().item()

print(f"Test Accuracy: {(100*correct):>0.1f}%, Test Avg loss: {test_loss:>8f} \n")


Test Accuracy: 96.2%, Test Avg loss: 0.145072 

