<a href="https://colab.research.google.com/github/vedvkandge2000/Deep-Learning-Project/blob/master/RNN_in_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import os
import torch.nn as nn
import numpy as np
from torch.nn.utils import clip_grad_norm
from torch.optim import lr_scheduler

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Dictionary(object):
  def __init__(self):
    self.word2idx = {}
    self.idx2word = {}
    self.idx = 0
  
  def add_word(self,word):
    if word not in self.word2idx:
      self.word2idx[word] = self.idx
      self.idx2word[self.idx] = word
      self.idx += 1

  def __len__(self):
    return len(self.word2idx)

In [None]:
class TextProcess(object):
  def __init__(self):
    self.dictionary = Dictionary()
  
  def get_data(self, path, batch_size=20):
    with open(path, 'r') as f:
      tokens = 0
      for line in f:
        words = line.split() + ['<eos>']
        tokens += len(words)
        for word in words:
          self.dictionary.add_word(word)
    # Create 1-D tensor that contains the index of all the words in file.
    rep_tensor = torch.LongTensor(tokens)
    index = 0
    with open(path, 'r') as f:
      for line in f:
        words = line.split() + ['<eos>']
        for word in words:
          rep_tensor[index] = self.dictionary.word2idx[word]
          index += 1
    # Find out how many batches we need
    num_batches = rep_tensor.shape[0] // batch_size
    # Remove the remainder( filter out the ones that don't fit)
    rep_tensor = rep_tensor[:num_batches*batch_size]
    # retrun (batch_size, new_batches)
    rep_tensor = rep_tensor.view(batch_size, -1)
    return rep_tensor 

In [None]:
embed_size = 128    #Input features to the LSTM
hidden_size = 1024  #Number of LSTM units
num_layers = 1
num_epochs = 100
batch_size = 20
timesteps = 30
learning_rate = 0.002

In [None]:
corpus = TextProcess()

In [None]:
rep_tensor = corpus.get_data('/content/alice.txt',batch_size)

In [None]:
#rep_tensor is the tensor that contains the index of all the words. Each row contains 1659 words by default 
print(rep_tensor.shape)

torch.Size([20, 1484])


In [None]:
vocab_size = len(corpus.dictionary)
print(vocab_size)

5290


In [None]:
num_batches = rep_tensor.shape[1] // timesteps
print(num_batches)

49


In [None]:
class TextGenerator(nn.Module):
  def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
    super(TextGenerator, self).__init__()
    self.embed = nn.Embedding(vocab_size, embed_size)  # Convert words to features.
    self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
    self.linear = nn.Linear(hidden_size, vocab_size)

  def forward(self,x,h):
    # Persorm word embedding
    x = self.embed(x)
    #Reshape the input tensor
    #x = x.view(batch_size,timesteps,embed_size)
    out, (h, c) = self.lstm(x, h)
    # Reshape the output from (samples,timesteps,output_features) to a shape appropriate for the FC layer 
    # (batch_size*timesteps, hidden_size)
    out = out.reshape(out.size(0)*out.size(1), out.size(2))
    # Decode hidden states of all time steps
    out = self.linear(out)
    return out, (h, c)

In [None]:
model = TextGenerator(vocab_size, embed_size, hidden_size, num_layers).to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
exp_lr_schedular = lr_scheduler.StepLR(optimizer, step_size = 20, gamma = 0.1)

In [None]:
for epoch in range(num_epochs):
    # Set initial hidden and cell states
    states = (torch.zeros(num_layers, batch_size, hidden_size).to(device),
              torch.zeros(num_layers, batch_size, hidden_size).to(device))

    for i in range(0, rep_tensor.size(1) - timesteps, timesteps):
        # Get mini-batch inputs and targets
        inputs = rep_tensor[:, i:i+timesteps]  
        targets = rep_tensor[:, (i+1):(i+1)+timesteps]
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        outputs,_ = model(inputs, states)
        loss = loss_fn(outputs, targets.reshape(-1))

        model.zero_grad()
        loss.backward()
        #Perform Gradient Clipping. clip_value (float or int) is the maximum allowed value of the gradients 
        #The gradients are clipped in the range [-clip_value, clip_value]. This is to prevent the exploding gradient problem
        clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()
        
        step = (i+1) // timesteps
        # exp_lr_schedular.step()
        if step % 100 == 0:
            print ('Epoch [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, loss.item()))



Epoch [1/100], Loss: 8.5732
Epoch [2/100], Loss: 5.9941
Epoch [3/100], Loss: 5.2025
Epoch [4/100], Loss: 4.6692
Epoch [5/100], Loss: 4.1725
Epoch [6/100], Loss: 3.7642
Epoch [7/100], Loss: 3.3880
Epoch [8/100], Loss: 2.8792
Epoch [9/100], Loss: 2.5074
Epoch [10/100], Loss: 2.0714
Epoch [11/100], Loss: 1.7428
Epoch [12/100], Loss: 1.5906
Epoch [13/100], Loss: 1.2320
Epoch [14/100], Loss: 0.9333
Epoch [15/100], Loss: 0.7256
Epoch [16/100], Loss: 0.4473
Epoch [17/100], Loss: 0.3444
Epoch [18/100], Loss: 0.1986
Epoch [19/100], Loss: 0.1341
Epoch [20/100], Loss: 0.0927
Epoch [21/100], Loss: 0.0777
Epoch [22/100], Loss: 0.0732
Epoch [23/100], Loss: 0.0713
Epoch [24/100], Loss: 0.0692
Epoch [25/100], Loss: 0.0678
Epoch [26/100], Loss: 0.0667
Epoch [27/100], Loss: 0.0657
Epoch [28/100], Loss: 0.0650
Epoch [29/100], Loss: 0.0643
Epoch [30/100], Loss: 0.0638
Epoch [31/100], Loss: 0.0632
Epoch [32/100], Loss: 0.0629
Epoch [33/100], Loss: 0.0624
Epoch [34/100], Loss: 0.0622
Epoch [35/100], Loss: 0

In [None]:
# Test the model
with torch.no_grad():
    with open('/content/results.txt', 'w') as f:
        # Set intial hidden ane cell states
        state = (torch.zeros(num_layers, 1, hidden_size).to(device),
                 torch.zeros(num_layers, 1, hidden_size).to(device))
        # Select one word id randomly and convert it to shape (1,1)
        input = torch.randint(0,vocab_size, (1,)).long().unsqueeze(1).to(device)

        for i in range(500):
            output, _ = model(input, state)
            print(output.shape)
            # Sample a word id from the exponential of the output 
            prob = output.exp()
            word_id = torch.multinomial(prob, num_samples=1).item()
            print(word_id)
            # Replace the input with sampled word id for the next time step
            input.fill_(word_id)

            # Write the results to file
            word = corpus.dictionary.idx2word[word_id]
            word = '\n' if word == '<eos>' else word + ' '
            f.write(word)
            
            if (i+1) % 100 == 0:
                print('Sampled [{}/{}] words and save to {}'.format(i+1, 500, 'results.txt'))

torch.Size([1, 5290])
5
torch.Size([1, 5290])
2974
torch.Size([1, 5290])
73
torch.Size([1, 5290])
20
torch.Size([1, 5290])
5
torch.Size([1, 5290])
5
torch.Size([1, 5290])
1284
torch.Size([1, 5290])
5
torch.Size([1, 5290])
3289
torch.Size([1, 5290])
5
torch.Size([1, 5290])
160
torch.Size([1, 5290])
6
torch.Size([1, 5290])
333
torch.Size([1, 5290])
5
torch.Size([1, 5290])
5
torch.Size([1, 5290])
34
torch.Size([1, 5290])
933
torch.Size([1, 5290])
366
torch.Size([1, 5290])
367
torch.Size([1, 5290])
3429
torch.Size([1, 5290])
5
torch.Size([1, 5290])
5
torch.Size([1, 5290])
3204
torch.Size([1, 5290])
20
torch.Size([1, 5290])
427
torch.Size([1, 5290])
11
torch.Size([1, 5290])
1667
torch.Size([1, 5290])
1697
torch.Size([1, 5290])
7
torch.Size([1, 5290])
44
torch.Size([1, 5290])
11
torch.Size([1, 5290])
1667
torch.Size([1, 5290])
250
torch.Size([1, 5290])
251
torch.Size([1, 5290])
34
torch.Size([1, 5290])
5
torch.Size([1, 5290])
265
torch.Size([1, 5290])
1909
torch.Size([1, 5290])
34
torch.Size