In [4]:
def rmsprop(parameters, sqrs, lr, alpha):
    eps = 1e-10
    for param, sqr in zip(parameters, sqrs):
        sqr[:] = alpha * sqr + (1 - alpha) * param.grad.data ** 2
        div = lr / torch.sqrt(sqr + eps) * param.grad.data
        param.data = param.data - div

In [5]:
import numpy as np
import torch
from torchvision.datasets import MNIST 
from torch.utils.data import DataLoader
from torch import nn
from torch.autograd import Variable
import time
import matplotlib.pyplot as plt
%matplotlib inline

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5 
    x = x.reshape((-1,)) 
    x = torch.from_numpy(x)
    return x

train_set = MNIST('./data', train=True, transform=data_tf, download=True) 
test_set = MNIST('./data', train=False, transform=data_tf, download=True)

criterion = nn.CrossEntropyLoss()

In [6]:
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
net = nn.Sequential(
    nn.Linear(784, 200),
    nn.ReLU(),
    nn.Linear(200, 10),
)

sqrs = []
for param in net.parameters():
    sqrs.append(torch.zeros_like(param.data))

losses = []
idx = 0
start = time.time() # 记时开始
for e in range(5):
    train_loss = 0
    for im, label in train_data:
        im = Variable(im)
        label = Variable(label)
        # forwad
        out = net(im)
        loss = criterion(out, label)
        # backward
        net.zero_grad()
        loss.backward()
        rmsprop(net.parameters(), sqrs, 1e-3, 0.9) # 学习率设为 0.001，alpha 设为 0.9
        # loss
        train_loss += loss.data[0]
        if idx % 30 == 0:
            losses.append(loss.data[0])
        idx += 1
    print('epoch: {}, Train Loss: {:.6f}'
          .format(e, train_loss / len(train_data)))
end = time.time() 
print('使用时间: {:.5f} s'.format(end - start))



epoch: 0, Train Loss: 0.373040
epoch: 1, Train Loss: 0.171692
epoch: 2, Train Loss: 0.127203
epoch: 3, Train Loss: 0.105141
epoch: 4, Train Loss: 0.089624
使用时间: 23.35725 s


In [7]:
# ----built-in RMSprop()
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
net = nn.Sequential(
    nn.Linear(784, 200),
    nn.ReLU(),
    nn.Linear(200, 10),
)
optimizer = torch.optim.RMSprop(net.parameters(), lr=1e-3, alpha=0.9)

start = time.time() # 记时开始
for e in range(5):
    train_loss = 0
    for im, label in train_data:
        im = Variable(im)
        label = Variable(label)
        # forward
        out = net(im)
        loss = criterion(out, label)
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # loss
        train_loss += loss.data[0]
    print('epoch: {}, Train Loss: {:.6f}'
          .format(e, train_loss / len(train_data)))
end = time.time() # 计时结束
print('time: {:.5f} s'.format(end - start))



epoch: 0, Train Loss: 0.379394
epoch: 1, Train Loss: 0.170528
epoch: 2, Train Loss: 0.123271
epoch: 3, Train Loss: 0.101504
epoch: 4, Train Loss: 0.088230
time: 31.11906 s
