In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np

# Tutorial source:
https://www.youtube.com/watch?v=0_PgWWmauHk

Load MNIST images from disc instead of using pytorch Dataloader.

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
# input_size = 784 # 28x28
num_classes = 2
num_epochs = 2
batch_size = 100
learning_rate = 0.001

input_size = 28
sequence_length = 28
hidden_size = 128
num_layers = 2

# Data parameters
dpath = '../../../local_data/fe_exp/mnist-org/'

In [3]:
def get_datapoint(batch_size, sequence_length, input_size, start_ind, dpath):
    images = np.zeros((batch_size, sequence_length, input_size))
    labels = np.zeros(batch_size)
    
    # load labels
    lbls = np.loadtxt(dpath+'lbls.txt')
    
    for ind in range(batch_size):
        # load an image
        fv = np.loadtxt(dpath+'fv_'+str(start_ind + ind)+'.txt')
        fv = fv.reshape(sequence_length, input_size)

        # add it to the batch
        images[ind, :, :] = fv

    # batch equivalent labels 
    labels = lbls[start_ind:start_ind+batch_size]
    
    # convert numpy arrays to torch tensors
    images = torch.from_numpy(images).to(torch.float32)
    labels = torch.from_numpy(labels).to(torch.int64)
    
    return images, labels

In [4]:
# Fully connected neural network with one hidden layer
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        # -> x needs to be: (batch_size, seq, input_size)
        
        # or:
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        #c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        
        # x: (n, 28, 28), h0: (2, n, 128)
        
        # Forward propagate RNN
        out, _ = self.rnn(x, h0)  
        # or:
        #out, _ = self.lstm(x, (h0,c0))  
        
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # out: (n, 28, 128)
        
        # Decode the hidden state of the last time step
        out = out[:, -1, :]
        # out: (n, 128)
         
        out = self.fc(out)
        # out: (n, 10)
        return out


In [5]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [6]:
# Train the model
n_total_steps = 100 # because there are 14780 images in total in the set
for epoch in range(num_epochs):
    for i in range(n_total_steps):  
        # origin shape: [N, 1, 28, 28]
        # resized: [N, 28, 28]
        images, labels = get_datapoint(batch_size, sequence_length, input_size, i*batch_size, dpath)
        
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 10 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/2], Step [10/100], Loss: 0.1397
Epoch [1/2], Step [20/100], Loss: 0.0038
Epoch [1/2], Step [30/100], Loss: 0.0036
Epoch [1/2], Step [40/100], Loss: 0.0046
Epoch [1/2], Step [50/100], Loss: 0.0413
Epoch [1/2], Step [60/100], Loss: 0.0670
Epoch [1/2], Step [70/100], Loss: 0.0311
Epoch [1/2], Step [80/100], Loss: 0.0832
Epoch [1/2], Step [90/100], Loss: 0.0051
Epoch [1/2], Step [100/100], Loss: 0.0038
Epoch [2/2], Step [10/100], Loss: 0.0039
Epoch [2/2], Step [20/100], Loss: 0.0019
Epoch [2/2], Step [30/100], Loss: 0.0051
Epoch [2/2], Step [40/100], Loss: 0.0063
Epoch [2/2], Step [50/100], Loss: 0.0014
Epoch [2/2], Step [60/100], Loss: 0.0378
Epoch [2/2], Step [70/100], Loss: 0.0040
Epoch [2/2], Step [80/100], Loss: 0.0068
Epoch [2/2], Step [90/100], Loss: 0.0012
Epoch [2/2], Step [100/100], Loss: 0.0009


In [7]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for i in range(101, 140): # take care not to repeat training data
        images, labels = get_datapoint(batch_size, sequence_length, input_size, i*batch_size, dpath)
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 99.92307692307692 %
