In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
sequence_size = 56
input_size = 14
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 32
learning_rate = 1e-3
num_epochs = 5

In [7]:
train_dataset = datasets.MNIST('./datas', train=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST('./datas', train=False, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
class Rnn(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(Rnn, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # b 56 128
        self.fc = nn.Linear(hidden_size, num_classes)
        
    
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.rnn(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out

In [9]:
model = Rnn(input_size, hidden_size, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
total_setp = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.view(-1, sequence_size, input_size).to(device)
        labels = labels.to(device)

        outs = model(images)

        loss = criterion(outs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        if (i+1) % 300 == 0:
            print 'Epoch: [{}/{}], Step: [{:4d}/{}], Loss: {:.6f}'.format(epoch+1, num_epochs, i+1, total_setp, loss)


Epoch: [1/5], Step: [ 300/1875], Loss: 0.122333
Epoch: [1/5], Step: [ 600/1875], Loss: 0.002006
Epoch: [1/5], Step: [ 900/1875], Loss: 0.169366
Epoch: [1/5], Step: [1200/1875], Loss: 0.026221
Epoch: [1/5], Step: [1500/1875], Loss: 0.082493
Epoch: [1/5], Step: [1800/1875], Loss: 0.193699
Epoch: [2/5], Step: [ 300/1875], Loss: 0.018863
Epoch: [2/5], Step: [ 600/1875], Loss: 0.014684
Epoch: [2/5], Step: [ 900/1875], Loss: 0.002834
Epoch: [2/5], Step: [1200/1875], Loss: 0.035762
Epoch: [2/5], Step: [1500/1875], Loss: 0.014161
Epoch: [2/5], Step: [1800/1875], Loss: 0.001170
Epoch: [3/5], Step: [ 300/1875], Loss: 0.017725
Epoch: [3/5], Step: [ 600/1875], Loss: 0.068726
Epoch: [3/5], Step: [ 900/1875], Loss: 0.028858
Epoch: [3/5], Step: [1200/1875], Loss: 0.036315
Epoch: [3/5], Step: [1500/1875], Loss: 0.007834
Epoch: [3/5], Step: [1800/1875], Loss: 0.002130
Epoch: [4/5], Step: [ 300/1875], Loss: 0.224592
Epoch: [4/5], Step: [ 600/1875], Loss: 0.005650
Epoch: [4/5], Step: [ 900/1875], Loss: 0

In [14]:
with torch.no_grad():
    acc = 0.
    total = 0.
    for images, labels in test_loader:
        images = images.view(-1, sequence_size, input_size).to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        _, pred = torch.max(outputs, 1)
        total += labels.size(0)
        acc += (pred == labels).sum().item()
        
    print 'Test: {}%'.format(100 * acc / total)

Test: 98.89%


In [11]:
torch.save(model.state_dict(), './ser/rnn.ckpt')