In [1]:
import torch
import numpy as np
import random
from IPython import display
from matplotlib import pyplot as plt
from torch import nn
import torch.utils.data as Data
import torch.optim as optim
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
import os
from tqdm import tqdm
from copy import deepcopy
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"  #防止jupyter崩溃

In [2]:
#下载MNIST手写数据集
mnist_train = torchvision.datasets.MNIST(root='./Datasets/MNIST', train=True,
download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.MNIST(root='./Datasets/MNIST', train=False,
download=True, transform=transforms.ToTensor())
#读取数据
batch_size = 32
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,
num_workers=0)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [3]:
# 3、构建模型
num_inputs = 784
num_outputs = 10  # 共10类
num_hiddens = 256


class FlattenLayer(torch.nn.Module):  # Flatten层
    def __init__(self):
        super(FlattenLayer, self).__init__()

    def forward(self, x):
        return x.view(x.shape[0], -1)


class SoftmaxLayer(torch.nn.Module):
    def __init__(self):
        super(SoftmaxLayer, self).__init__()

    def forward(self, X):
        X_exp = X.exp()  # 对每个元素做指数运算
        partition = X_exp.sum(dim=1, keepdim=True)  # 求列和，即对同行元素求和 n*1
        return X_exp / partition  # broadcast


net = torch.nn.Sequential(
    FlattenLayer(),
    torch.nn.Linear(num_inputs, num_hiddens),
    # 下面是三种可选用的激活函数
    torch.nn.ReLU(),  # Relu激活函数
    # torch.nn.Softplus(),  # Softplus激活函数
    # torch.nn.Tanh(),  # Tanh激活函数
    torch.nn.Linear(num_hiddens, num_outputs),
    SoftmaxLayer(),
)

In [4]:
# 4、初始化模型参数
for params in net.parameters():  # 对网络中的每个参数
    torch.nn.init.normal_(params, mean=0, std=0.01)  # 初始化为服从均值0标准差0.01正态分布


In [5]:
# 5、损失函数与优化器
num_epochs = 10  # 训练轮次
lr = 0.1
loss = torch.nn.CrossEntropyLoss()  # 交叉熵损失函数
optimizer = torch.optim.SGD(net.parameters(), lr)


In [6]:
# 评估函数
def evaluate(data_iter, net):
    right_sum, n, loss_sum = 0.0, 0, 0.0
    for x, y in data_iter:
        y_ = net(x)
        l = loss(y_, y).sum()
        right_sum += (y_.argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
        loss_sum += l.item()
    return right_sum / n, loss_sum / n


In [7]:
def get_kfold_data(k, i, data):  # 获取第i+1（i=0~k-1）折的训练集和验证集
    # train_features = mnist_train.data  # 训练集特征数据
    # train_labels = mnist_train.targets  # 训练集标签数据
    fold_size = data.targets.shape[0] // k  # 每份数据个数
    valid_data = deepcopy(data)
    train_data = deepcopy(data)
    start_ = i*fold_size
    if i != k-1:
        end_ = (i+1)*fold_size
        valid_data.data = valid_data.data[start_:end_]  # 验证集
        valid_data.targets = valid_data.targets[start_:end_]  # 验证集
        train_data.data = torch.cat((train_data.data[0:start_], train_data.data[end_:]), dim=0)  # cat拼接
        train_data.targets = torch.cat((train_data.targets[0:start_], train_data.targets[end_:]), dim=0)  # cat拼接
    else:  # 是最后一折
        valid_data.data, valid_data.targets = valid_data.data[start_:], valid_data.targets[start_:]  #
        train_data.data, train_data.targets = train_data.data[0:start_], train_data.targets[0:start_]
    return train_data, valid_data


In [8]:

def k_train(net, train_data, valid_data):
    train_iter = Data.DataLoader(
        dataset=train_data,  # torch TensorDataset format
        batch_size=batch_size,
        shuffle=True,  # 是否打乱数据
        num_workers=0,  # 多线程来读数据，在Win下需要设置为0
    )
    valid_iter = Data.DataLoader(
        dataset=valid_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,
    )

    train_acc, train_l = 0.0, 0.0
    valid_acc, valid_l = 0.0, 0.0

    optimizer = torch.optim.SGD(net.parameters(), lr=lr)

    for epoch in range(num_epochs):
        train_r_num, train_l_, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            # optimizer.step()
            optimizer.step()
            # optimizer.zero_grad()
            optimizer.zero_grad()
            train_r_num += (y_hat.argmax(dim=1) == y).sum().item()
            train_l_ += l.item()
            n += y.shape[0]
        v_acc, v_l = evaluate(valid_iter, net)
        valid_acc += v_acc
        valid_l += v_l
        train_acc += train_r_num / n
        train_l += train_l_ / n
    return train_l/num_epochs, valid_l/num_epochs, train_acc/num_epochs, valid_acc/num_epochs


def kfold_train(k):
    train_loss_sum, valid_loss_sum = 0, 0
    train_acc_sum, valid_acc_sum = 0, 0
    for i in range(k):
        print('第', i+1, '折验证')
        train_data, valid_data = get_kfold_data(k, i, mnist_train)
        net_ = torch.nn.Sequential(
            FlattenLayer(),
            torch.nn.Linear(num_inputs, num_hiddens),
            torch.nn.ReLU(),  # Relu激活函数
            torch.nn.Linear(num_hiddens, num_outputs),
            SoftmaxLayer(),
        )
        for params in net_.parameters():  # 对网络中的每个参数
            torch.nn.init.normal_(params, mean=0, std=0.01)  # 初始化为服从均值0标准差0.01正态分布
            
        train_loss, val_loss, train_acc, val_acc = k_train(net_, train_data, valid_data)
        print('train loss %.4f, val loss %.4f, train acc %.3f, val acc %.3f' % (train_loss, val_loss, train_acc, val_acc))

        train_loss_sum += train_loss
        valid_loss_sum += val_loss
        train_acc_sum += train_acc
        valid_acc_sum += val_acc
    print('\n最终k折交叉验证结果：')
    print('ave train loss: %.4f, ave train acc: %.3f' % (train_loss_sum/k, train_acc_sum/k))
    print('ave valid loss: %.4f, ave valid acc: %.3f' % (valid_loss_sum/k, valid_acc_sum/k))

kfold_train(10)


第 1 折验证
train loss 0.0499, val loss 0.0488, train acc 0.882, val acc 0.911
第 2 折验证
train loss 0.0499, val loss 0.0492, train acc 0.880, val acc 0.898
第 3 折验证
train loss 0.0493, val loss 0.0489, train acc 0.898, val acc 0.908
第 4 折验证
train loss 0.0499, val loss 0.0491, train acc 0.878, val acc 0.903
第 5 折验证
train loss 0.0495, val loss 0.0490, train acc 0.892, val acc 0.907
第 6 折验证
train loss 0.0494, val loss 0.0488, train acc 0.895, val acc 0.911
第 7 折验证
train loss 0.0497, val loss 0.0491, train acc 0.888, val acc 0.905
第 8 折验证
train loss 0.0494, val loss 0.0489, train acc 0.894, val acc 0.911
第 9 折验证
train loss 0.0496, val loss 0.0491, train acc 0.891, val acc 0.903
第 10 折验证
train loss 0.0494, val loss 0.0481, train acc 0.895, val acc 0.936

最终k折交叉验证结果：
ave train loss: 0.0496, ave train acc: 0.889
ave valid loss: 0.0489, ave valid acc: 0.909
