In [1]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import matplotlib.pyplot as plt

In [2]:
class simpleNet(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(simpleNet, self).__init__()
        self.layer1 = nn.Linear(in_dim, n_hidden_1)
        self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
        self.layer3 = nn.Linear(n_hidden_2, out_dim)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

In [3]:
class activationNet(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(activationNet, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.ReLU(inplace=True))
        self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.ReLU(inplace=True))
        self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

In [4]:
class batchNet(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(batchNet, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),
                                    nn.BatchNorm1d(n_hidden_1),
                                    nn.ReLU(inplace=True))
        self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),
                                    nn.BatchNorm1d(n_hidden_2),
                                    nn.ReLU(inplace=True))
        self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
        
    def forward(self, x):
        x = layer1(x)
        x = layer2(x)
        x = layer3(x)
        return x

In [5]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [6]:
batch_size = 64
learning_rate = 1e-2
num_epoches = 20

In [22]:
def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5
    x = x.reshape((-1,))
    x = torch.from_numpy(x)
    return x

In [31]:
from torchvision.datasets import mnist
train_set = mnist.MNIST(root='./mnist', train=True, transform=data_tf, download=True)
test_set = mnist.MNIST(root='./mnist', train=False, transform=data_tf, download=True)

In [32]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [15]:
criterion = nn.CrossEntropyLoss()

In [25]:
def train_model(model, optimizer):
    losses = []
    acces = []
    eval_losses = []
    eval_acces = []
    for e in range(num_epoches):
        train_loss = 0
        train_acc = 0
        model.train()
        for img, label in train_loader:
            img = Variable(img)
            label = Variable(label)
            out = model(img)
            loss = criterion(out, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, pred = out.max(1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / img.shape[0]
            train_acc += acc
        
        losses.append(train_loss / len(train_dataset))
        acces.append(train_acc / len(train_dataset))
        
        eval_loss = 0
        eval_acc = 0
        model.eval()
        for img, label in test_loader:
            img = Variable(img)
            label = Variable(label)
            out = model(img)
            loss = criterion(out, label)
            eval_loss += loss.item()
            _, pred = out.max(1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / img.shape[0]
            eval_acc += acc
            
        eval_losses.append(eval_loss / len(test_dataset))
        eval_acces.append(eval_acc / len(test_dataset))
        
        print('*'*10)
        print('Epoch {}'.format(e+1))
        print('Train Loss: {:.6f}, Train Acc: {:.6f}'.format(train_loss/len(train_dataset), train_acc/len(train_dataset)))
        print('Eval Loss: {:.6f}, Eval Acc: {:.6f}'.format(eval_loss/len(test_dataset), eval_acc/len(test_dataset)))

In [17]:
model1 = simpleNet(28*28, 300, 100, 10)
optimizer1 = torch.optim.SGD(model1.parameters(), lr=learning_rate)

In [18]:
train_model(model=model1, optimizer=optimizer1)

**********
Epoch 1
Train Loss: 0.011857, Train Acc: 0.012666
Eval Loss: 0.006006, Eval Acc: 0.013984
**********
Epoch 2
Train Loss: 0.005675, Train Acc: 0.013987
Eval Loss: 0.005095, Eval Acc: 0.014239
**********
Epoch 3
Train Loss: 0.005133, Train Acc: 0.014143
Eval Loss: 0.004743, Eval Acc: 0.014341
**********
Epoch 4
Train Loss: 0.004898, Train Acc: 0.014226
Eval Loss: 0.004783, Eval Acc: 0.014373
**********
Epoch 5
Train Loss: 0.004752, Train Acc: 0.014272
Eval Loss: 0.004551, Eval Acc: 0.014358
**********
Epoch 6
Train Loss: 0.004645, Train Acc: 0.014317
Eval Loss: 0.004533, Eval Acc: 0.014387
**********
Epoch 7
Train Loss: 0.004585, Train Acc: 0.014333
Eval Loss: 0.004467, Eval Acc: 0.014419
**********
Epoch 8
Train Loss: 0.004520, Train Acc: 0.014334
Eval Loss: 0.004520, Eval Acc: 0.014412
**********
Epoch 9
Train Loss: 0.004477, Train Acc: 0.014361
Eval Loss: 0.004359, Eval Acc: 0.014452
**********
Epoch 10
Train Loss: 0.004426, Train Acc: 0.014373
Eval Loss: 0.004472, Eval Acc

In [26]:
model2 = activationNet(28*28, 300, 100, 10)
optimizer2 = torch.optim.SGD(model2.parameters(), lr=learning_rate)

In [27]:
train_model(model=model2, optimizer=optimizer2)

**********
Epoch 1
Train Loss: 0.015234, Train Acc: 0.011676
Eval Loss: 0.006725, Eval Acc: 0.013787
**********
Epoch 2
Train Loss: 0.005916, Train Acc: 0.013925
Eval Loss: 0.005377, Eval Acc: 0.014113
**********
Epoch 3
Train Loss: 0.005017, Train Acc: 0.014160
Eval Loss: 0.004567, Eval Acc: 0.014355
**********
Epoch 4
Train Loss: 0.004508, Train Acc: 0.014309
Eval Loss: 0.004241, Eval Acc: 0.014450
**********
Epoch 5
Train Loss: 0.004100, Train Acc: 0.014442
Eval Loss: 0.003810, Eval Acc: 0.014583
**********
Epoch 6
Train Loss: 0.003750, Train Acc: 0.014543
Eval Loss: 0.003478, Eval Acc: 0.014702
**********
Epoch 7
Train Loss: 0.003433, Train Acc: 0.014638
Eval Loss: 0.003384, Eval Acc: 0.014716
**********
Epoch 8
Train Loss: 0.003141, Train Acc: 0.014718
Eval Loss: 0.002993, Eval Acc: 0.014834
**********
Epoch 9
Train Loss: 0.002891, Train Acc: 0.014795
Eval Loss: 0.002839, Eval Acc: 0.014872
**********
Epoch 10
Train Loss: 0.002653, Train Acc: 0.014860
Eval Loss: 0.002542, Eval Acc

In [29]:
model = nn.Sequential(nn.Linear(784, 400),
                      nn.ReLU(),
                      nn.Linear(400, 200),
                      nn.ReLU(), 
                      nn.Linear(200, 100),
                      nn.ReLU(),
                      nn.Linear(100, 10))
opt = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [30]:
train_model(model, opt)

**********
Epoch 1
Train Loss: 0.020457, Train Acc: 0.009917
Eval Loss: 0.008302, Eval Acc: 0.013234
**********
Epoch 2
Train Loss: 0.006672, Train Acc: 0.013671
Eval Loss: 0.006149, Eval Acc: 0.013822
**********
Epoch 3
Train Loss: 0.005175, Train Acc: 0.014130
Eval Loss: 0.004535, Eval Acc: 0.014384
**********
Epoch 4
Train Loss: 0.004483, Train Acc: 0.014313
Eval Loss: 0.004016, Eval Acc: 0.014544
**********
Epoch 5
Train Loss: 0.003959, Train Acc: 0.014480
Eval Loss: 0.003721, Eval Acc: 0.014577
**********
Epoch 6
Train Loss: 0.003516, Train Acc: 0.014605
Eval Loss: 0.003278, Eval Acc: 0.014742
**********
Epoch 7
Train Loss: 0.003127, Train Acc: 0.014722
Eval Loss: 0.003027, Eval Acc: 0.014803
**********
Epoch 8
Train Loss: 0.002794, Train Acc: 0.014826
Eval Loss: 0.002751, Eval Acc: 0.014859
**********
Epoch 9
Train Loss: 0.002517, Train Acc: 0.014887
Eval Loss: 0.003289, Eval Acc: 0.014719
**********
Epoch 10
Train Loss: 0.002294, Train Acc: 0.014962
Eval Loss: 0.002228, Eval Acc

In [36]:
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)
# 开始训练
losses = []
acces = []
eval_losses = []
eval_acces = []

for e in range(20):
    train_loss = 0
    train_acc = 0
    model.train()
    for im, label in train_data:
        im = Variable(im)
        label = Variable(label)
        # 前向传播
        out = model(im)
        loss = criterion(out, label)
        # 反向传播
        opt.zero_grad()
        loss.backward()
        opt.step()
        # 记录误差
        train_loss += loss.data[0]
        # 计算分类的准确率
        _, pred = out.max(1)
        num_correct = (pred == label.item()).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc
        
    losses.append(train_loss / len(train_data))
    acces.append(train_acc / len(train_data))
    # 在测试集上检验效果
    eval_loss = 0
    eval_acc = 0
    model.eval() # 将模型改为预测模式
    for im, label in test_data:
        im = Variable(im)
        label = Variable(label)
        out = model(im)
        loss = criterion(out, label)
        # 记录误差
        eval_loss += loss.data[0]
        # 记录准确率
        _, pred = out.max(1)
        num_correct = (pred == label.item()).sum().item()
        acc = num_correct / im.shape[0]
        eval_acc += acc
        
    eval_losses.append(eval_loss / len(test_data))
    eval_acces.append(eval_acc / len(test_data))
    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
          .format(e, train_loss / len(train_data), train_acc / len(train_data), 
                     eval_loss / len(test_data), eval_acc / len(test_data)))




ValueError: only one element tensors can be converted to Python scalars