In [1]:
%matplotlib inline

# Author: Xiang Wang

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms, models
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

## 一、数据处理与加载

由于MNIST数据集较为简单，实际每个框架都自定义了其内置读入的方式，所以这里也就不从0读入，而是直接使用pytorch的内置的函数读入

In [98]:
print('==> Preparing data..')

data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),

    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),
}


# 上面的处理其实也等同于使用下面这个函数，只不过是pytorch为了用户使用方便而封装的东西，上面的操作是先将其转化为张量然后进行归一化操作
# 注意，使用上面的方法时，/255这个操作是隐性的，不需要人为写出操作
# def data_tf(x):
#     x = np.array(x, dtype='float32') / 255
#     x = (x - 0.5) / 0.5 # 标准化，这个技巧之后会讲到
#     x = torch.from_numpy(x)
#     return x

==> Preparing data..


In [102]:
# 使用内置函数下载和读入 mnist 数据集
train_set = datasets.MNIST('/Users/macbook/Documents/Programme/pytorch/Data/Mnist ', train=True, download=True, 
                                 transform=data_transforms['train'])

test_set = datasets.MNIST('/Users/macbook/Documents/Programme/pytorch/Data/Mnist ', train=False, download=True,
                                 transform=data_transforms['test'])


In [104]:
# 我们可以看下数据读入和处理后的样子
data, label = train_set[0]
print('data_shape: ', data.shape, '\nlabel: ', label)

data_shape:  torch.Size([1, 28, 28]) 
label:  tensor(5)


使用DataLoader得到可迭代形式的数据格式

In [105]:
# 得到可batch的数据格式
dataloaders = torch.utils.data.DataLoader(train_set, batch_size=4,
                                             shuffle=True, num_workers=1)

testdataloaders = torch.utils.data.DataLoader(test_set, batch_size=4,
                                             shuffle=False, num_workers=1)

In [106]:
data, label = next(iter(testdataloaders))
print('data_shape: ', data.shape, '\nlabel: ', label)

data_shape:  torch.Size([4, 1, 28, 28]) 
label:  tensor([ 7,  2,  1,  0])


## 二、建立模型

In [73]:
class basicDNN(nn.Module):
    
    def __init__(self, num_classes=10):
        super(basicDNN, self).__init__()
        
        self.model_name = 'DNN'
        
        self.fc = nn.Sequential(
            nn.Linear(28 * 28, 400),
            nn.ReLU(inplace=True),
            
            nn.Linear(400, 200),
            nn.ReLU(inplace=True),
            
            nn.Linear(200, 100),
            nn.ReLU(inplace=True),

            nn.Linear(100, num_classes),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

In [77]:
model_ft = basicDNN()

In [78]:
# 查看我们定义的网络结构
print(model_ft)

basicDNN(
  (fc): Sequential(
    (0): Linear(in_features=784, out_features=400, bias=True)
    (1): ReLU(inplace)
    (2): Linear(in_features=400, out_features=200, bias=True)
    (3): ReLU(inplace)
    (4): Linear(in_features=200, out_features=100, bias=True)
    (5): ReLU(inplace)
    (6): Linear(in_features=100, out_features=10, bias=True)
    (7): ReLU(inplace)
  )
)


## 三、设置必要的参数

In [83]:
# 设置为GPU处理
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)

# 设置损失函数
# 交叉熵在 pytorch 中已经内置了，同时不需要label也不需要我们做处理，内置函数会帮我们处理，非常省心
criterion = nn.CrossEntropyLoss()

# 设置优化算法
optimizer = optim.SGD(model_ft.parameters(), lr=0.1)


## 四、开始训练

In [109]:
# 开始训练
losses = []
acces = []
eval_losses = []
eval_acces = []

for e in range(1):
    train_loss = 0
    train_acc = 0
    model_ft.train()
    for im, label in dataloaders:
        im = Variable(im)
        label = Variable(label)
        # 前向传播
        out = model_ft(im)
        loss = criterion(out, label)
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 记录误差
        train_loss += loss.data[0]
        # 计算分类的准确率
        _, pred = out.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc
    print(e)
  
    losses.append(train_loss / len(dataloaders))
    acces.append(train_acc / len(dataloaders))
    # 在测试集上检验效果
    eval_loss = 0
    eval_acc = 0
    model_ft.eval() # 将模型改为预测模式
    for im, label in testdataloaders:
        im = Variable(im)
        label = Variable(label)
        out = model_ft(im)
        loss = criterion(out, label)
        # 记录误差
        eval_loss += loss.data[0]
        # 记录准确率
        _, pred = out.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        eval_acc += acc
        
    eval_losses.append(eval_loss / len(testdataloaders))
    eval_acces.append(eval_acc / len(testdataloaders))
    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
          .format(e, train_loss / len(dataloaders), train_acc / len(dataloaders), 
                     eval_loss / len(testdataloaders), eval_acc / len(testdataloaders)))



0




epoch: 0, Train Loss: 2.302612, Train Acc: 0.098717, Eval Loss: 2.302601, Eval Acc: 0.098000


## 五、模型的保存与加载

In [129]:
torch.save(model_ft, 'save_model.pth')

  "type " + obj.__name__ + ". It won't be checked "


In [130]:
model_ft2 = torch.load('save_model.pth')