In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [12]:
# Hyper Parameter
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.01

In [3]:
# MNIST Dataset
train_dataset = dsets.MNIST(root='../data/',
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='../data/',
                           train=False, 
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

# RNN Model (Many-to-One)

In [9]:
# RNN Model (Many-to-One)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        #Set initial states
        
        h0 = Variable(torch.zeros(num_layers, x.size(0), self.hidden_size))
        c0 = Variable(torch.zeros(num_layers, x.size(0), self.hidden_size))
        
        #Forward Propagate
        
        out, _ = self.lstm(x, (h0,c0))
        
         # Decode hidden state of last time step
        out = self.fc(out[:, -1, :])  
        return out

In [10]:
rnn = RNN(input_size, hidden_size, num_layers, num_classes)


# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

In [13]:
# Train the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, sequence_length, input_size))
        labels = Variable(labels)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
                    %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

Epoch [1/2], Step [100/600], Loss: 0.5338
Epoch [1/2], Step [200/600], Loss: 0.2692
Epoch [1/2], Step [300/600], Loss: 0.1629
Epoch [1/2], Step [400/600], Loss: 0.1019
Epoch [1/2], Step [500/600], Loss: 0.1880
Epoch [1/2], Step [600/600], Loss: 0.1304
Epoch [2/2], Step [100/600], Loss: 0.0794
Epoch [2/2], Step [200/600], Loss: 0.1503
Epoch [2/2], Step [300/600], Loss: 0.0494
Epoch [2/2], Step [400/600], Loss: 0.1390
Epoch [2/2], Step [500/600], Loss: 0.2526
Epoch [2/2], Step [600/600], Loss: 0.0866


In [16]:
 for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, sequence_length, input_size))
        labels = Variable(labels)

In [29]:
images.size(0)

100

In [31]:
loss.data


1.00000e-02 *
  8.6606
[torch.FloatTensor of size 1]

In [55]:
lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)

h0 = Variable(torch.zeros(num_layers, images.size(0), hidden_size))
c0 = Variable(torch.zeros(num_layers, images.size(0), hidden_size))
        
#Forward Propagate
        
out, A  = lstm(images, (h0,c0))        

In [49]:
out

Variable containing:
( 0 ,.,.) = 
 -1.1569e-02  1.5846e-02 -3.9912e-03  ...  -1.1828e-02 -2.5372e-02  1.4051e-02
 -2.0086e-02  2.3296e-02 -2.4362e-03  ...  -1.3378e-02 -3.9156e-02  1.5597e-02
 -2.5844e-02  2.6381e-02 -4.4064e-04  ...  -1.1961e-02 -4.6108e-02  1.3702e-02
                 ...                   ⋱                   ...                
 -1.0073e-02  1.1796e-02  7.1005e-03  ...  -2.6459e-02 -5.0801e-02  1.1825e-02
 -1.6360e-02  1.4496e-02  9.4884e-03  ...  -2.2689e-02 -4.9924e-02  1.1296e-02
 -2.2157e-02  1.7666e-02  9.8018e-03  ...  -1.7962e-02 -4.9998e-02  1.0284e-02

( 1 ,.,.) = 
 -1.1569e-02  1.5846e-02 -3.9912e-03  ...  -1.1828e-02 -2.5372e-02  1.4051e-02
 -2.0086e-02  2.3296e-02 -2.4362e-03  ...  -1.3378e-02 -3.9156e-02  1.5597e-02
 -2.5844e-02  2.6381e-02 -4.4064e-04  ...  -1.1961e-02 -4.6108e-02  1.3702e-02
                 ...                   ⋱                   ...                
 -2.5875e-02  2.2861e-02  9.3850e-03  ...  -9.8135e-03 -4.6350e-02  1.0997e-02
 -2.

In [70]:
out[:,-1,:]

Variable containing:
 6.2910e-02 -1.9163e-02 -7.8758e-02  ...   4.7302e-02 -3.2354e-02 -4.5685e-02
 6.3158e-02 -1.5859e-02 -7.5224e-02  ...   3.8398e-02 -3.1954e-02 -4.2401e-02
 6.0725e-02 -1.6988e-02 -7.9096e-02  ...   3.9929e-02 -2.8718e-02 -4.0764e-02
                ...                   ⋱                   ...                
 6.6429e-02 -1.6769e-02 -7.9435e-02  ...   3.8017e-02 -3.5446e-02 -4.0007e-02
 6.2130e-02 -1.4991e-02 -7.6022e-02  ...   3.8656e-02 -2.9067e-02 -4.3446e-02
 6.3985e-02 -1.9132e-02 -7.8415e-02  ...   4.3107e-02 -2.9712e-02 -4.6575e-02
[torch.FloatTensor of size 100x128]

In [61]:
A[1]

Variable containing:
( 0 ,.,.) = 
 -2.6670e-02  3.1863e-02 -8.5677e-02  ...  -1.9916e-01 -2.3491e-02  2.0430e-02
 -2.8832e-02  8.9233e-03 -8.6614e-02  ...  -1.5755e-01 -3.1161e-02  9.2547e-03
 -3.6671e-02  2.5809e-02 -9.3431e-02  ...  -1.4968e-01 -3.9327e-02  7.5007e-03
                 ...                   ⋱                   ...                
 -4.9560e-02  2.4455e-02 -8.5921e-02  ...  -1.4408e-01 -4.1288e-02  1.2797e-02
 -3.9241e-02  1.9108e-02 -8.4092e-02  ...  -1.5258e-01 -3.3994e-02  1.6177e-02
 -2.6028e-02  1.7030e-02 -8.7740e-02  ...  -1.6264e-01 -2.9146e-02  1.5838e-02

( 1 ,.,.) = 
  1.2123e-01 -3.9223e-02 -1.5591e-01  ...   1.0274e-01 -6.4244e-02 -9.4596e-02
  1.2238e-01 -3.2413e-02 -1.4896e-01  ...   8.3393e-02 -6.3470e-02 -8.7849e-02
  1.1723e-01 -3.4749e-02 -1.5686e-01  ...   8.6692e-02 -5.7147e-02 -8.4367e-02
                 ...                   ⋱                   ...                
  1.2845e-01 -3.4372e-02 -1.5782e-01  ...   8.2685e-02 -7.0589e-02 -8.2744e-02
  1.

In [62]:
h0

Variable containing:
( 0 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

( 1 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
[torch.FloatTensor of size 2x100x128]

In [89]:
labels

Variable containing:
 9
 1
 3
 3
 3
 4
 9
 2
 4
 6
 1
 0
 4
 5
 3
 5
 6
 9
 0
 3
 8
 6
 1
 3
 2
 1
 9
 1
 2
 6
 2
 5
 2
 5
 2
 9
 9
 9
 2
 6
 1
 4
 3
 3
 4
 6
 1
 8
 0
 4
 1
 5
 7
 5
 9
 7
 1
 8
 4
 9
 8
 7
 7
 2
 0
 1
 6
 7
 7
 7
 3
 8
 2
 5
 4
 2
 4
 1
 7
 4
 9
 7
 3
 2
 3
 7
 4
 0
 1
 6
 7
 3
 8
 6
 9
 7
 0
 8
 6
 3
[torch.LongTensor of size 100]

In [75]:
fc = nn.Linear(hidden_size, num_classes)

In [90]:
fc(out[:,-1,:])

Variable containing:
 0.0450  0.0012 -0.0180  ...  -0.0411 -0.0497  0.0487
 0.0441  0.0015 -0.0149  ...  -0.0385 -0.0458  0.0475
 0.0461  0.0040 -0.0157  ...  -0.0393 -0.0474  0.0502
          ...             ⋱             ...          
 0.0461  0.0023 -0.0170  ...  -0.0359 -0.0460  0.0481
 0.0451  0.0042 -0.0146  ...  -0.0394 -0.0462  0.0492
 0.0448  0.0022 -0.0158  ...  -0.0399 -0.0453  0.0486
[torch.FloatTensor of size 100x10]