# Dropout丢弃法缓解过拟合(简洁实现)

In [1]:
import collections
import math
import os
import random
import sys
import tarfile
import time
import json
import zipfile
from tqdm import tqdm
from PIL import Image
from collections import namedtuple

from IPython import display
from matplotlib import pyplot as plt
import torch
from torch import nn
from torch.nn import init
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchtext
import torchtext.vocab as Vocab
import numpy as np


## 获取数据

In [2]:
def load_data_fashion_mnist(batch_size, resize=None, root='data/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

In [3]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

## 定义模型网络

In [4]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer,self).__init__()
    def forward(self,x):
        return x.view(x.shape[0],-1)

pytorch提供了自动的dropout层函数，在训练时启用，验证时不启用

关于pytorch如何识别是否是训练集：在torchvision读取该数据库时，建立dataset的函数有bool型参数train

所以当train_iter传入数据时判断为训练模式，test_iter传入数据时判断为测试模式

In [9]:
num_inputs,num_hidden1,num_hidden2,num_outputs=784,512,256,10
drop_prob1,drop_prob2=0.2,0.5

net=nn.Sequential(
    FlattenLayer(),
    nn.Linear(num_inputs, num_hidden1),
    nn.ReLU(),
    nn.Dropout(drop_prob1),
    nn.Linear(num_hidden1, num_hidden2),
    nn.ReLU(),
    nn.Dropout(drop_prob2),
    nn.Linear(num_hidden2, num_outputs)
)

for param in net.parameters():
    init.normal_(param,mean=0,std=0.01)
    
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

# 以下为计算测试集准确率
def evaluate_accuracy(data_iter,net):
    acc_sum, n=0.0, 0
    for X,y in data_iter:
        acc_sum+=(net(X).argmax(dim=1)==y).float().sum().item()  #注意这里是sum不是mean 之后会÷n
        n+=y.shape[0]
    return acc_sum / n

## 训练模型

In [14]:
num_epochs = 10
def train(net,train_iter,test_iter,loss,num_epochs):
    
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n=0.0,0.0,0
        
        for X,y in train_iter:
            y_hat=net(X)
            l=loss(y_hat,y).sum()

            # w和b梯度清零
            optimizer.zero_grad()

            # 计算loss函数梯度
            l.backward()
            
            #梯度下降
            optimizer.step()
            
            # loss和精确度加和
            train_l_sum+=l.item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().item()
            n+=y.shape[0]
        
        test_acc=evaluate_accuracy(test_iter,net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' 
            % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train (net, train_iter, test_iter, loss, num_epochs)

epoch 1, loss 0.0011, train acc 0.893, test acc 0.863
epoch 2, loss 0.0011, train acc 0.897, test acc 0.859
epoch 3, loss 0.0010, train acc 0.900, test acc 0.859
epoch 4, loss 0.0010, train acc 0.904, test acc 0.835
epoch 5, loss 0.0010, train acc 0.909, test acc 0.843
epoch 6, loss 0.0009, train acc 0.908, test acc 0.874
epoch 7, loss 0.0009, train acc 0.912, test acc 0.864
epoch 8, loss 0.0009, train acc 0.912, test acc 0.882
epoch 9, loss 0.0009, train acc 0.916, test acc 0.877
epoch 10, loss 0.0008, train acc 0.918, test acc 0.850
