In [30]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [7]:
training_data = [
    ("The cat ate the cheese".lower().split(), ["DET", "NN", "V", "DET", "NN"]),
    ("She read that book".lower().split(), ["NN", "V", "DET", "NN"]),
    ("The dog loves art".lower().split(), ["DET", "NN", "V", "NN"]),
    ("The elephant answers the phone".lower().split(), ["DET", "NN", "V", "DET", "NN"])
]

wordidx = {}

for sentence, tags in training_data:
    for word in sentence:
        if word not in wordidx:
            wordidx[word] = len(wordidx)

tagidx = {"DET":0, "NN":1, "V":2}
print(wordidx)

{'the': 0, 'cat': 1, 'ate': 2, 'cheese': 3, 'she': 4, 'read': 5, 'that': 6, 'book': 7, 'dog': 8, 'loves': 9, 'art': 10, 'elephant': 11, 'answers': 12, 'phone': 13}


In [12]:
def prepare_sequence(seq, to_idx):
    idxs = [to_idx[s] for s in seq]
    idxs = np.array(idxs)
    return torch.from_numpy(idxs).long()

In [14]:
test = "the dog ate the book".lower().split()
test = prepare_sequence(test, wordidx)
print(test)

tensor([0, 8, 2, 0, 7])


In [32]:
class Tagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(Tagger, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_dim), torch.zeros(1, 1, self.hidden_dim)
        
    def forward(self, sentence):
        embs = self.word_embedding(sentence)
        lstm_out, self.hidden = self.lstm(embs.view(len(sentence), 1, -1), self.hidden)
        output = self.hidden2tag(lstm_out.view(len(sentence), -1))
        score = F.log_softmax(output, dim=1)
        return score
        

In [33]:
embedding_dim = 6
hidden_dim = 6

model = Tagger(embedding_dim, hidden_dim, len(wordidx), len(tagidx))
loss_func = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)