In [0]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from torch.nn import Embedding
import torch.optim as optim 

#1-Creating and preparing dataset:

In [126]:
# First we create the training data using some sentences and their word-tags

training_data = [
    ("The cat ate the cheese".lower().split(), ["DET", "NN", "V", "DET", "NN"]),
    ("She read that book".lower().split(), ["NN", "V", "DET", "NN"]),
    ("The dog loves art".lower().split(), ["DET", "NN", "V", "NN"]),
    ("The elephant answers the phone".lower().split(), ["DET", "NN", "V", "DET", "NN"]),
    ("He looked at that animal".lower().split(),["NN","V","DET","DET","NN"])
]

training_data[0]

(['the', 'cat', 'ate', 'the', 'cheese'], ['DET', 'NN', 'V', 'DET', 'NN'])

In [131]:
#Now we create a dictionary of code for each tag 

tag2idx = {"DET": 0, "NN": 1, "V": 2}

#Now we have to give for each word a unique index

word2idx={}

for words,tags in training_data:

  for word in words:

    if word not in word2idx:

      word2idx[word]=len(word2idx)
    
print("dictionary of words:"+str(word2idx))     

dictionary of words:{'the': 0, 'cat': 1, 'ate': 2, 'cheese': 3, 'she': 4, 'read': 5, 'that': 6, 'book': 7, 'dog': 8, 'loves': 9, 'art': 10, 'elephant': 11, 'answers': 12, 'phone': 13, 'he': 14, 'looked': 15, 'at': 16, 'animal': 17}


In [0]:
# We must prepare the data in order to output a vector 

def prepare(sentense,idx=word2idx):

  """
  This function takes a sentence/tags and returns a pytorch tensor of corresponding indexes to each word
  """
  output=np.array([idx[word] for word in sentense])
  output=torch.from_numpy(output)
  return output


#2-Understanding the code of LSTM:










In [0]:
lstm=nn.LSTM(input_size=4,hidden_size=6,)
"""
the input is a 3D tensor of shape (seq_len, batch, input_size)
input_size :Number of features EX:in time series temperature,pressor,and weather informations in a specific time
hidden_size:Number of hidden states (Neurones)
By default the first state is set to 0 we can change it
output, (h_n, c_n)
    - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor  
      containing the output features `(h_t)` from the last layer of the LSTM
    -(h_n,c_n) is the short and long term memory of the LSTM

"""


# 3-Create Model for Word-Tagger:*texte en italique*

In [0]:
class WordTagger(nn.Module):

  def __init__(self,embedding_size,hidden_size,vocabulary_size,target_size):
    
    super(WordTagger,self).__init__()

    self.hidden_dim=hidden_size

    self.embedding=Embedding(vocabulary_size,embedding_size)
    
    self.lstm=nn.LSTM(input_size=embedding_size,hidden_size=hidden_size)

    self.linear=nn.Linear(hidden_size,target_size)

    self.hidden=self.init_hidden()

  def init_hidden(self):

    """
    The method initialize the hidden state of LSTM cells 
    Hidden state consists of 2 tensors of shape (num_layers * num_directions, batch, hidden_size)
    """
    h0=torch.zeros(1,1,self.hidden_dim)
    c0=torch.zeros(1,1,self.hidden_dim)
    return (h0,c0)

  def forward(self,sentence):

    embed=self.embedding(sentence)
    embed=embed.view(len(sentence),1,-1) #Add the batch size to the size of the input
    lstm_out,self.hidden=self.lstm(embed,self.hidden)
    outputs=self.linear(lstm_out.view(len(sentence),-1))
    tags_scores=F.log_softmax(outputs,dim=1) #the softmax will be computed accros 1 which means for each word (not sequence)
    
    return tags_scores


#4-Define the Model:

In [0]:
embedding_size=6 #Encode each word in 6 dimensional vector
hidden_size=5
target_size=len(tag2idx) #we have 3 tags
vocabulary_size=len(word2idx)

Model=WordTagger(embedding_size,hidden_size,vocabulary_size,target_size)

loss_function=nn.NLLLoss() #Since our LSTM outputs a series of tag scores with a softmax layer, we will use NLLLoss
optimizer=optim.SGD(Model.parameters(),lr=0.1)

#5-Train the Model:


In [141]:
epochs=300

for e in range(epochs):

  epoch_loss=0.0

  for sentence,tags in training_data:

    Model.hidden = Model.init_hidden() #This will detach the model from its history each sentence is independent

    sentence=prepare(sentence,word2idx)
    tags=prepare(tags,tag2idx)

    
    
    predicted_tags=Model(sentence)
    

    #assert predicted_tags.size()==tags.size(),"Error in the size of tags"
    
    
    Model.zero_grad()

    loss=loss_function(predicted_tags,tags)

    epoch_loss+=loss.item()

    loss.backward()

    optimizer.step()

  if e%20==19:

    print("epoch:{} ,loss = {} ".format(e+1,epoch_loss/len(training_data))) #We print the mean loss for the words


epoch:20 ,loss = 1.0291601777076722 
epoch:40 ,loss = 0.9433849334716797 
epoch:60 ,loss = 0.7052922964096069 
epoch:80 ,loss = 0.3633378207683563 
epoch:100 ,loss = 0.1720342755317688 
epoch:120 ,loss = 0.09648850858211518 
epoch:140 ,loss = 0.06288489252328873 
epoch:160 ,loss = 0.04526218958199024 
epoch:180 ,loss = 0.03478707931935787 
epoch:200 ,loss = 0.027976383827626704 
epoch:220 ,loss = 0.02324918545782566 
epoch:240 ,loss = 0.0198029525578022 
epoch:260 ,loss = 0.017193349823355675 
epoch:280 ,loss = 0.015156717039644718 
epoch:300 ,loss = 0.013527943752706051 


#6-Testing the Model:

In [152]:
test_sentence = "she loves cheese".lower().split()

# see what the scores are after training
inputs = prepare(test_sentence, word2idx)
tag_scores = Model(inputs)
print(tag_scores)

# print the most likely tag index, by grabbing the index with the maximum score!
# recall that these numbers correspond to tag2idx = {"DET": 0, "NN": 1, "V": 2}
_, predicted_tags = torch.max(tag_scores, 1)
print('\n')
print('Predicted tags: \n',predicted_tags)
test={0:"DET",1:"NN",2:"V"}
for id,word in enumerate(test_sentence):
  
  print('Word:{} ,Predicted Tag : {} ==> {} '.format(word,predicted_tags[id],test[predicted_tags[id].item()]))

tensor([[-5.8063, -0.0071, -5.4951],
        [-4.3214, -5.2606, -0.0186],
        [-1.5231, -0.5951, -1.4678]], grad_fn=<LogSoftmaxBackward>)


Predicted tags: 
 tensor([1, 2, 1])
Word:she ,Predicted Tag : 1 ==> NN 
Word:loves ,Predicted Tag : 2 ==> V 
Word:cheese ,Predicted Tag : 1 ==> NN 
