<a href="https://colab.research.google.com/github/yudhiesh/PyTorch/blob/master/NeuralLanguageModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch 

In [36]:
sentences = ["Hi my name is Yudhiesh", 'How are you doing today', 'Monday is a great day', 'I love to study NLP']

In [37]:
vocab = {} # map from word type to index
inputs = [] # stores an indexified version of each sentence

for sent in sentences:
    sent_idxes = []
    sent = sent.split() # tokenize w/ whitespace
    for w in sent:
        if w not in vocab:
            vocab[w] = len(vocab) # add a new type to the vocab
        sent_idxes.append(vocab[w])
    inputs.append(sent_idxes)

print(vocab)
print(inputs)

{'Hi': 0, 'my': 1, 'name': 2, 'is': 3, 'Yudhiesh': 4, 'How': 5, 'are': 6, 'you': 7, 'doing': 8, 'today': 9, 'Monday': 10, 'a': 11, 'great': 12, 'day': 13, 'I': 14, 'love': 15, 'to': 16, 'study': 17, 'NLP': 18}
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 3, 11, 12, 13], [14, 15, 16, 17, 18]]


In [38]:
# two things: 1. convert to LongTensors, 2. define inputs / outputs
# Inputs are the sentences excluding the word we want to predict
prefixes = torch.LongTensor([sent[:-1] for sent in inputs])
# Labels are the words that we want to predict 
labels = torch.LongTensor([sent[-1] for sent in inputs])

In [39]:
prefixes

tensor([[ 0,  1,  2,  3],
        [ 5,  6,  7,  8],
        [10,  3, 11, 12],
        [14, 15, 16, 17]])

In [40]:
labels

tensor([ 4,  9, 13, 18])

In [52]:
class NLM(torch.nn.Module):
  def __init__(self, d_embedding, d_hidden, window_size, len_vocab):
        super(NLM, self).__init__() # init the base Module class
        self.d_emb = d_embedding
        self.embeddings = torch.nn.Embedding(len_vocab, d_embedding)
        # concatenated embeddings > hidden
        self.W_hid = torch.nn.Linear(d_embedding*window_size, d_hidden)
        # hidden > output probability distribution over vocab
        self.W_out = torch.nn.Linear(d_hidden, len_vocab)

  def forward(self, input): # each input will be a batch of prefixes (in this case 4)
      batch_size, window_size = input.size() # 4 x 4
      embs = self.embeddings(input) # 4 x 4 x 5
      print('embedding size:', embs.size())

      # next,  we want to concatenate the prefix embeddings together
      concat_embs = embs.view(batch_size, window_size * self.d_emb) # 4 x 20
      print('concatenated embs size:', concat_embs.size())

      hidden = self.W_hid(concat_embs)
      outputs = self.W_out(hidden)
      return outputs


network = NLM(d_embedding=5, d_hidden=12, window_size=4, len_vocab=len(vocab))

EPOCHS = 30
LEARNING_RATE = 0.1
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=network.parameters(), lr=LEARNING_RATE)

for i in range(EPOCHS):
  logits = network(prefixes)
  loss = loss_fn(logits, labels)
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(f"Epoch: {i}\nLoss: {loss}")
  


embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 0
Loss: 3.1679701805114746
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 1
Loss: 2.8735785484313965
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 2
Loss: 2.617265462875366
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 3
Loss: 2.379495859146118
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 4
Loss: 2.1488254070281982
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 5
Loss: 1.9196674823760986
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 6
Loss: 1.6906859874725342
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([4, 20])
Epoch: 7
Loss: 1.4634395837783813
embedding size: torch.Size([4, 4, 5])
concatenated embs size: torch.Size([

In [70]:
# Make predictions 

rev_vocab = {value: key for key, value in vocab.items() }
name = prefixes[0].unsqueeze(0) # Add batch dimension
logits = network(name)
probs = torch.nn.functional.softmax(logits, dim=1).squeeze(0)
predicted = probs.argmax().item()
next_word = rev_vocab[predicted]
print(f"Given the input: Hi my name is, the model predicts that the next word will be {next_word} with probability {probs[predicted]:4f}")

embedding size: torch.Size([1, 4, 5])
concatenated embs size: torch.Size([1, 20])
Given the input: Hi my name is, the model predicts that the next word will be Yudhiesh with probability 0.939876
