# RECURRENT NEURAL NETWORKS

## Problem Statement

text here

figure here

### 1) Imports

In [1]:
from __future__ import print_function           # Use print() instead of print
from __future__ import division                 # 1 / 2 = 0.5, 1 // 2 = 0
import torch                                    # import PyTorch module 
import torch.nn as nn                           # neural network module
from torch.utils.data import DataLoader         # dataloader abstracts sampling of minibatch
import torchvision.datasets as dset             # standard machine learning datasets
import torchvision.transforms as tform          # transforms for data augmentation
from torch.autograd import Variable             # auto differentiation
import sd_utils as utils                        # some python helper functions ( see: sd_utils.py ) 

### 2) Hyper Parameters and Initializations

In [2]:
torch.manual_seed(1234)                        # set random number seed for repeatability of results

<torch._C.Generator at 0x7fadc3c30d68>

#### a) Dataset related

In [3]:
num_classes = 10                                # MNIST dataset has digits 0 to 9
seq_len = 28                                    # 28x28 image is considered as a sequence of 28 rows
                                                # number of steps to unroll the RNN for

#### b) Learning

In [4]:
batch_size = 100
num_epochs = 2
learning_rate = 0.01

#### c) Network

In [5]:
input_size = 28                                # 
hidden_size = 128                              # number of neurons in the hidden layer
num_layers = 2                                 # number of recurrent layers 

### 3) Download and Preprocess Dataset

In [6]:
utils.mkdir_p('../data')
utils.mkdir_p('../data/lab5')                           # create data directories if they do not exist


dset_train = dset.MNIST(root='../data/lab5',            # where to download the dataset ?
                        train=True,                     # train or test
                        transform=tform.ToTensor(),     # what transforms to apply ? can compose many transforms
                        download=True                   # download dataset if required
                       )

dset_test = dset.MNIST(root='../data/lab5',             # where to download the dataset ?
                        train=False,                    # train or test
                        transform=tform.ToTensor(),     # what transforms to apply ? can compose many transforms
                        download=True                   # download dataset if required
                       )

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


### 4) Create [DataLoader](http://pytorch.org/docs/_modules/torch/utils/data/dataloader.html) objects ("[python generator](http://naiquevin.github.io/python-generators-and-being-lazy.html)" which samples a minibatch from the dataset)

In [7]:
dloader_train = DataLoader(dataset=dset_train,         # dataset object
                           batch_size=batch_size,      # batch size
                           shuffle=True                # important to shuffle data while training
                          )

dloader_test  = DataLoader(dataset=dset_test,          # dataset object
                           batch_size=batch_size,      # batch size
                           shuffle=False               # main consistent order for test set
                          )

### 5) Define neural network model : RNN

In [8]:
class RNN(nn.Module):
    """Implements a LSTM network"""
    def __init__(self):
        super(RNN, self).__init__()                    # python inheritance: call base class constructor
        self.hidden_size = hidden_size                 
        self.num_layers = num_layers                  # number of recurrent layers
        self.lstm = nn.LSTM(input_size=input_size,    # number of expected features in the input x 
                            hidden_size=hidden_size,  # number of features in the hidden state h
                            num_layers=num_layers,    # number of recurrent layers
                            batch_first=True          # order: (batch, seq, feature)
                           )
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        
        # Each image is a separate sequence
        # So we need to reinitialize each time
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) 
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        
        h = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
            # Decode hidden state of last time step
            
        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))  
            
        out = self.fc(out[:, -1, :])  
        return out
    
rnn = RNN()

### 6) Define Loss function and Optimizer

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)     

### 7) Training Loop

In [10]:
# Train the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(dloader_train):
        images = Variable(images.view(-1, seq_len, input_size))
        labels = Variable(labels)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = rnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, len(dset_train)//batch_size, loss.data[0]))

Epoch [1/2], Step [100/600], Loss: 0.8047
Epoch [1/2], Step [200/600], Loss: 0.2387
Epoch [1/2], Step [300/600], Loss: 0.1089
Epoch [1/2], Step [400/600], Loss: 0.1174
Epoch [1/2], Step [500/600], Loss: 0.1375
Epoch [1/2], Step [600/600], Loss: 0.3086
Epoch [2/2], Step [100/600], Loss: 0.0915
Epoch [2/2], Step [200/600], Loss: 0.2028
Epoch [2/2], Step [300/600], Loss: 0.1224
Epoch [2/2], Step [400/600], Loss: 0.1017
Epoch [2/2], Step [500/600], Loss: 0.1004
Epoch [2/2], Step [600/600], Loss: 0.0746


### 8) Testing Loop

In [11]:
correct = 0
total = 0
for images, labels in dloader_test:
    images = Variable(images.view(-1, seq_len, input_size))
    outputs = rnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) 

Test Accuracy of the model on the 10000 test images: 96 %


### 9) Save model 

In [12]:
torch.save(rnn.state_dict(), 'rnn.pkl')

## References
Following references were used in preparing this tutorial:
1. Yunjey Choi, [PyTorch Tutorials](https://github.com/yunjey/pytorch-tutorial)
2. Pytorch, [PyTorch Tuorials](http://pytorch.org/tutorials/)
3. Andrej Karpathy, [Unreasonable Effectiveness of RNNs](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)
4. Christopher Olah [Understanding LSTMs](http://colah.github.io/posts/2015-08-Understanding-LSTMs/)
5. DeepLearning4j [Beginner's guide to learning LSTMs](https://deeplearning4j.org/lstm.html)