In [8]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from transformers.testing_utils import require_torch

In [9]:
# nn.LSTM(input_size, hidden_size)
lstm = nn.LSTM(5, 3, num_layers=5, bidirectional=True)
# input:(seq_len, batch, input_size)
inputs = torch.randn(20, 10, 5)
# h:(num_layers * num_directions, batch, hidden_size)
h = torch.randn(10, 10, 3)
# c:(num_layers * num_directions, batch, hidden_size)
c = torch.randn(10, 10, 3)
# out:(seq_len, batch, num_directions * hidden_size)
out, (h, c) = lstm(inputs, (h, c))
print(out.size())
print(h.size())
print(c.size())

torch.Size([20, 10, 6])
torch.Size([10, 10, 3])
torch.Size([10, 10, 3])


In [10]:
def prepare_sequence(seq, to_idx):
    idx = [to_idx[w] for w in seq]
    tensor = torch.LongTensor(idx)
    return tensor


training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_idx = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
print(word_to_idx)
print(training_data)
tag_to_idx = {"DET": 0, "NN": 1, "V": 2}

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']), (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]


In [11]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [12]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, target_size):
        super(LSTMTagger, self).__init__()
        # hidden_dim:hidden_size
        # embedding_dim:input_size
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, target_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))

    def forward(self, x):
        # x: seq_len * vocab_size
        embedding = self.word_embeddings(x)
        # embedding:seq_len * input_size
        embedding = embedding.view(len(x), 1, -1)
        lstm_out, self.hidden = self.lstm(embedding, self.hidden)
        tag_space = self.fc(lstm_out.view(len(x), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [13]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_idx), len(tag_to_idx))
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [14]:
inputs = prepare_sequence(training_data[0][0], word_to_idx)
print(training_data[0][0])
tag_scores = model(inputs)
print(tag_scores)

for epoch in range(1000):
    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden()
        sentence_in = prepare_sequence(sentence, word_to_idx)
        targets = prepare_sequence(tags, tag_to_idx)
        tag_scores = model(sentence_in)
        targets = targets.view(-1)
        tag_scores = tag_scores.view(-1, tag_scores.size(-1))
        loss = loss_fn(tag_scores, targets)
        loss.backward()
        optimizer.step()

inputs = prepare_sequence(training_data[0][0], word_to_idx)
tag_scores = model(inputs)
print(tag_scores)

['The', 'dog', 'ate', 'the', 'apple']
tensor([[-1.2011, -1.3253, -0.8361],
        [-1.2071, -1.2434, -0.8854],
        [-1.2073, -1.2521, -0.8792],
        [-1.2665, -1.2063, -0.8702],
        [-1.2653, -1.2015, -0.8745]], grad_fn=<LogSoftmaxBackward0>)
tensor([[-1.6212e-05, -1.2883e+01, -1.1199e+01],
        [-1.4843e+01, -4.2795e-05, -1.0067e+01],
        [-1.1543e+01, -1.1592e+01, -1.8954e-05],
        [-8.8214e-06, -1.3240e+01, -1.1857e+01],
        [-1.6707e+01, -1.1921e-07, -1.6236e+01]],
       grad_fn=<LogSoftmaxBackward0>)
