## NNLM mini demo

![](./img/nnlm-model.jpg)

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [2]:
# 数据类型
dtype = torch.FloatTensor

In [3]:
# 训练数据预处理
sentences = [ "i like dog", "i love coffee", "i hate milk"]
word_list = ' '.join(sentences).split()
word_list = list(set(word_list))
word_dict = {w:i for i,w in enumerate(word_list)}
number_dict = {i:w for i,w in enumerate(word_list)}
n_class = len(number_dict)

In [4]:
# 函数：制作训练数据
def make_batch(sentences):
    input_batch, target_batch = [],[]
    for sen in sentences:
        words = sen.split()
        input_sen = [word_dict[w] for w in words[:-1]]
        target = word_dict[words[-1]] # 注意维数一致
        input_batch.append(input_sen)
        target_batch.append(target)
    return input_batch,target_batch

In [5]:
# NNLM参数
n_step = 2 # 用前面n_step个词来预测后面的词，可根据任务调参
n_hidden = 3 # 隐藏层单元个数，可调参数
m = 2 # 单词的特征向量维数，可调参数

![](./img/nnlm-paramters.jpg)

In [6]:
# 模型定义
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM,self).__init__()
        # 定义参数大小时候先不考虑batchsize
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Parameter(torch.randn(n_step*m, n_hidden).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))        
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))

    def forward(self, X):
        # batch_size不确定，故x在与参数矩阵相乘时，x在前面
        
        # [batch_size, n_step, m]
        X = self.C(X) 
        
        # [batch_size, n_step, m]-->[batch_size, n_step * m] 
        # 把一个句子的前n个词的表示拉平
        X = X.view(-1, n_step * m) 
        
        # [batch_size, n_step * m] * [n_step*m, n_hidden] + d = [batch_size,n_hidden]
        tanh = torch.tanh(self.d + torch.mm(X, self.H))
        
        # b [batch_size, n_class]
        # [batch_size, n_step * m]* [n_step * m, n_class] = [batch_size, n_class]
        # [batch_size,n_hidden] * [n_hidden,n_class]
        output = self.b + torch.mm(X,self.W) + torch.mm(tanh, self.U)
        return output


In [7]:
model = NNLM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
input_batch, target_batch = make_batch(sentences)
input_batch_tensor = Variable(torch.LongTensor(input_batch))
target_batch_tensor = Variable(torch.LongTensor(target_batch))

In [9]:
epochs = 5000
for e in range(epochs):
    optimizer.zero_grad()
    output = model(input_batch_tensor) # [batch_size, n_class]
    loss = criterion(output, target_batch_tensor) # target_batch: [batch_size]
    
    if (e + 1)%1000 == 0:
        print('Epoch:', '%04d' % (e + 1), 'cost =', '{:.6f}'.format(loss))
    
    loss.backward()
    optimizer.step()    

Epoch: 1000 cost = 0.122461
Epoch: 2000 cost = 0.015361
Epoch: 3000 cost = 0.004809
Epoch: 4000 cost = 0.002055
Epoch: 5000 cost = 0.001008


In [10]:
predict = model(input_batch_tensor).data.max(1,keepdim=True)[1]
print(predict)

tensor([[2],
        [5],
        [1]])


In [11]:
for i,input_item in enumerate(input_batch):
    print(' '.join( [number_dict[index] for index in input_item]),end=' --> ')
    print(number_dict[predict[i].item()])

i like --> dog
i love --> coffee
i hate --> milk
