In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [0]:
dtype = torch.FloatTensor

sentences = ['i like dog', 'i love coffee', 'i hate milk']

In [0]:
word_list = ' '.join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}

n_class = len(word_dict)
# number of vocabulary

In [0]:
# NNLM parameter
n_step = 2
n_hidden = 2
m = 2

In [0]:
def make_batch(sentences):
    input_batch = []
    target_batch = []
    
    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]
        
        input_batch.append(input)
        target_batch.append(target)
    return input_batch, target_batch

In [0]:
# model
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))
    
    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m)
        # [batch_size, n_step * n_class]
        tanh = torch.tanh(self.d + torch.mm(X, self.H))
        # [batch_size, n_hidden]
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U)
        # [batch_size, n_class]
        return output

    
model = NNLM()

In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

In [11]:
# training
for epoch in range(5000):
    
    optimizer.zero_grad()
    output = model(input_batch)
    
    # output: [batch_size, n_class], target_batch: [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)
    if (epoch+1)%100 == 0:
        print('Epoch: ', "%04d" % (epoch+1), 'cost = ', '{:.6f}'.format(loss))
        
    loss.backward()
    optimizer.step()

Epoch:  0100 cost =  2.450696
Epoch:  0200 cost =  1.583535
Epoch:  0300 cost =  1.025847
Epoch:  0400 cost =  0.651667
Epoch:  0500 cost =  0.428014
Epoch:  0600 cost =  0.295713
Epoch:  0700 cost =  0.213091
Epoch:  0800 cost =  0.158796
Epoch:  0900 cost =  0.121646
Epoch:  1000 cost =  0.095377
Epoch:  1100 cost =  0.076273
Epoch:  1200 cost =  0.062041
Epoch:  1300 cost =  0.051208
Epoch:  1400 cost =  0.042807
Epoch:  1500 cost =  0.036182
Epoch:  1600 cost =  0.030879
Epoch:  1700 cost =  0.026577
Epoch:  1800 cost =  0.023046
Epoch:  1900 cost =  0.020115
Epoch:  2000 cost =  0.017659
Epoch:  2100 cost =  0.015582
Epoch:  2200 cost =  0.013811
Epoch:  2300 cost =  0.012288
Epoch:  2400 cost =  0.010970
Epoch:  2500 cost =  0.009819
Epoch:  2600 cost =  0.008808
Epoch:  2700 cost =  0.007912
Epoch:  2800 cost =  0.007112
Epoch:  2900 cost =  0.006394
Epoch:  3000 cost =  0.005746
Epoch:  3100 cost =  0.005160
Epoch:  3200 cost =  0.004636
Epoch:  3300 cost =  0.004173
Epoch:  34

In [0]:
# predict
predict = model(input_batch).data.max(1, keepdim=True)[1]

In [13]:
predict

tensor([[2],
        [0],
        [6]])

In [14]:
# Test
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']
