In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
#单向单层RNN
class TextRNN(nn.Module):
    def __init__(self,input_size,hidden_size,worddict_len):
        super(TextRNN,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.worddict_len = worddict_len

        #单层循环神经网络
        self.rnn = nn.RNN(self.input_size,self.hidden_size) 
        #全连接层
        self.out = nn.Linear(self.hidden_size,self.worddict_len,bias=False)
        
        #偏置
        self.bias = nn.Parameter(torch.zeros(self.worddict_len))

    def forward(self,x,hidden):

        #转置，将batch_size放到第二维
        x=x.transpose(0,1)

        outputs , hidden = self.rnn(x,hidden)

        #将最后一个时间步的输出作为真正输出
        output = outputs[-1]

        result = self.out(output)+self.bias

        return result


In [None]:
def make_batch(sentences,word_dict,worddict_len,hidden_size):
    input_batch = []
    input_batch = []
    target_batch = []
    for se in sentences:
        word = se.split()
        # 将单词列表的子列表转换为字符串
        #列表不能作为字典的键的索引
        input = [word_dict[n] for n in word[:-1]]  
        target = word_dict[word[-1]]  
        input_batch.append(np.eye(worddict_len)[input]) 
        target_batch.append(target) 
        batch_size = len(input_batch)
        
        input_batch = torch.FloatTensor(input_batch)
        target_batch = torch.LongTensor(target_batch)
    hidden = torch.zeros(1,batch_size,hidden_size)

    return input_batch,target_batch,batch_size,hidden 
    
#test
# sentences = ["i like dog", "i love coffee", "i hate milk", "dog hate milk"]
# word_dict = {"i": 0, "like": 1, "love": 2, "hate": 3, "dog": 4, "coffee": 5, "milk": 6}
# worddict_len = len(word_dict)
# input_batch,target_batch = make_batch(sentences,word_dict,worddict_len)
# input_batch = torch.Tensor(input_batch)
# target_batch = torch.Tensor(target_batch)
# print(input_batch.shape)

In [None]:
sentences = ["i like dog", "i love coffee", "i hate milk", "dog love i","dog hate milk"]
word_dict = {"i": 0, "like": 1, "love": 2, "hate": 3, "dog": 4, "coffee": 5, "milk": 6}
worddict_len = len(word_dict)

model = TextRNN(input_size=worddict_len,hidden_size=5,n_class=worddict_len)

#输入数据的转换
input_batch,target_batch,batch_size,hidden= make_batch(sentences,word_dict,worddict_len,hidden_size=model.hidden_size)

epochs = 100
lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

In [None]:
#训练模型
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    result = model(input_batch,hidden)

    loss = criterion(result,target_batch)
    print('Epoch: {}, Loss: {:.4f}'.format(epoch, loss.item()))
    loss.backward()
    optimizer.step()

In [None]:
#测试模型
model.eval()
test_data = ["i like dog"]
test_batch,target_batch = make_batch(test_data,word_dict,worddict_len)
test_batch = torch.FloatTensor(test_batch)
hidden = torch.zeros(1,1,model.hidden_size)

result = model(test_batch,hidden)
print(result)
_,predict = torch.max(result,1)
print(predict)

