In [1]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

# 使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SOS_token = 0
EOS_token = 1

In [2]:
# 主要用于储存单词与id的映射
class Vocabulary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {0: "<SOS>", 1: "<EOS>", 2: "<unk>"}
        self.idx = 3 # Count SOS and EOS

    # 记录word和id之间的映射
    def add_word(self, word):
        if not word in self.word2idx:
            self.word2idx[word] = self.idx
            self.idx2word[self.idx] = word
            self.idx += 1
            
    # 将句子进行分词，添加每个单词与id的映射
    def add_sentence(self, sentence):
        for word in sentence.split():
            self.add_word(word)
    
    # 得到某个单词的id
    def __call__(self, word):
        if not word in self.word2idx:
            return 2
        return self.word2idx[word]
    
    # vaocabulary的容量
    def __len__(self):
        return self.idx

In [3]:
class EncoderRNN(nn.Module):
    # 在构造函数内定义了一个Embedding层和一GRU层，
    def __init__(self, word_size, embedding_dim):
        super(EncoderRNN, self).__init__()
        
        # 定义需要的变量
        self.gru_hidden_size = 4
        self.gru_num_layers = 1
        self.embedding_dim = embedding_dim
        self.gru_h0 = self.initHidden()

        
        # 定义网络
        self.embedding = nn.Embedding(word_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, self.gru_hidden_size)

    # 前向传播
    def forward(self, x, h):
        embedding = self.embedding(x)
        embedding = embedding.view(1, 1, self.embedding_dim)
        _, h = self.gru(embedding, h)
        return h
    
    # 最终执行函数
    def sample(self,seq_list):
        word_list = torch.LongTensor(seq_list).to(device)
        h = self.gru_h0
        for word_tensor in word_list:
            h = self(word_tensor, h)
        return h

    # 初始化第一层的h0，随机生成一个
    def initHidden(self):
        tensor = torch.Tensor(self.gru_num_layers, 1, self.gru_hidden_size)
        return nn.init.normal_(tensor)

class DecoderRNN(nn.Module):
    def __init__(self, word_size, embedding_dim):
        super(DecoderRNN, self).__init__()
        
        # 定义需要的变量
        self.gru_hidden_size = 4
        self.embedding_dim = embedding_dim
        self.maxlen = 10
        
        # 定义网络
        self.embedding = nn.Embedding(word_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, self.gru_hidden_size)
        self.linear = nn.Linear(self.gru_hidden_size, word_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
        
    def forward(self, x, s):
        embedding = self.embedding(x)
        embedding = embedding.view(1, 1, self.embedding_dim)
        embedding = F.relu(embedding)
        gru_output, h = self.gru(embedding, s)
        output = self.linear(gru_output[0])
        output = self.softmax(output)
        return output, h

    # pre_hidden即公式中所谓的固定C向量
    def sample(self, pre_hidden):
        inputs = torch.LongTensor([SOS_token]).to(device)
        h = pre_hidden
        res = [SOS_token]
        for i in range(self.maxlen):
            output, h = self(inputs, h)
            top_value, top_index = output.topk(1)
            if top_index.item() == EOS_token:
                res.append(top_index.item())
                break
            else:
                res.append(top_index.item())
            inputs = top_index.squeeze().detach()
            
        return res

In [4]:
# 处理句子，将句子转换成Tensor
def sentence2tensor(lang, sentence):
    indexes = [lang(word) for word in sentence.split()]
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

# 将(input, target)的pair都转换成Tensor
def pair2tensor(pair):
    input_tensor = sentence2tensor(lan1, pair[0])
    target_tensor = sentence2tensor(lan2, pair[1])
    return (input_tensor, target_tensor)

# 定义句子和Vocabulary类
lan1 = Vocabulary()
lan2 = Vocabulary()

data = [['Hi .', '嗨 。'],
        ['Hi .', '你 好 。'],
        ['Run .', '跑'],
        ['Wait !', '等等 ！'],
        ['Hello !', '你好 。'],
        ['I try .', '让 我 来 。'],
        ['I won !', '我 赢 了 。'],
        ['I am OK .', '我 沒事 。']]

for i,j in data:
    lan1.add_sentence(i)
    lan2.add_sentence(j)
print(len(lan1))
print(len(lan2))

# 定义Encoder和Decoder以及训练的一些参数
import random
learning_rate = 0.001
hidden_size = 256

# 将Encoder, Decoder放到GPU
encoder = EncoderRNN(len(lan1), hidden_size).to(device)
decoder = DecoderRNN(len(lan2), hidden_size).to(device)
# 网络参数 = Encoder参数 + Decoder参数
params = list(encoder.parameters()) + list(decoder.parameters())
# 定义优化器
optimizer = optim.Adam(params, lr=learning_rate)
loss = 0
# NLLLoss = Negative Log Likelihood Loss
criterion = nn.NLLLoss()
# 一共训练多次轮
turns = 200
print_every = 20
print_loss_total = 0
# 将数据random choice，然后转换成 Tensor
training_pairs = [pair2tensor(random.choice(data)) for pair in range(turns)]

14
17


In [5]:
training_pairs

[(tensor([[6],
          [7],
          [1]]),
  tensor([[8],
          [9],
          [1]])),
 (tensor([[ 9],
          [11],
          [ 7],
          [ 1]]),
  tensor([[12],
          [14],
          [15],
          [ 4],
          [ 1]])),
 (tensor([[ 9],
          [12],
          [13],
          [ 4],
          [ 1]]),
  tensor([[12],
          [16],
          [ 4],
          [ 1]])),
 (tensor([[ 9],
          [11],
          [ 7],
          [ 1]]),
  tensor([[12],
          [14],
          [15],
          [ 4],
          [ 1]])),
 (tensor([[5],
          [4],
          [1]]),
  tensor([[7],
          [1]])),
 (tensor([[ 9],
          [12],
          [13],
          [ 4],
          [ 1]]),
  tensor([[12],
          [16],
          [ 4],
          [ 1]])),
 (tensor([[5],
          [4],
          [1]]),
  tensor([[7],
          [1]])),
 (tensor([[3],
          [4],
          [1]]),
  tensor([[5],
          [6],
          [4],
          [1]])),
 (tensor([[6],
          [7],
         

In [7]:
# 训练过程
for turn in range(turns):
    optimizer.zero_grad()
    loss = 0
    
    x, y = training_pairs[turn]
    input_length = x.size(0)
    target_length = y.size(0)
#     print(y)
    
    # 初始化Encoder中的h0
    h0 = encoder.gru_h0
    
    # 对input进行Encoder
    for i in range(input_length):
        h0 = encoder(x[i], h0)
    
    # Decoder的一个input <sos>
    decoder_input = torch.LongTensor([SOS_token]).to(device)
    
    h = h0
    for i in range(target_length):
        decoder_output, h= decoder(decoder_input, h)
        top_value, top_index = decoder_output.topk(1)
        decoder_input = top_index.squeeze().detach()
#         print(decoder_output)
#         print(y[i])
        loss += criterion(decoder_output, y[i])
        print(loss)
        if decoder_input.item() == EOS_token:break  
    print_loss_total += loss.item()/target_length
    if (turn+1) % print_every == 0 :
#         print("loss:{loss:,.4f}".format(loss=print_loss_total/print_every))
        print_loss_total = 0
        
    loss.backward()
    optimizer.step()

tensor(3.1511, grad_fn=<AddBackward0>)
tensor(6.1546, grad_fn=<AddBackward0>)
tensor(9.3204, grad_fn=<AddBackward0>)
tensor(2.2798, grad_fn=<AddBackward0>)
tensor(4.9380, grad_fn=<AddBackward0>)
tensor(7.9589, grad_fn=<AddBackward0>)
tensor(10.9707, grad_fn=<AddBackward0>)
tensor(14.2727, grad_fn=<AddBackward0>)
tensor(2.6847, grad_fn=<AddBackward0>)
tensor(5.9708, grad_fn=<AddBackward0>)
tensor(8.7699, grad_fn=<AddBackward0>)
tensor(11.3298, grad_fn=<AddBackward0>)
tensor(2.2344, grad_fn=<AddBackward0>)
tensor(4.3790, grad_fn=<AddBackward0>)
tensor(7.3715, grad_fn=<AddBackward0>)
tensor(9.8709, grad_fn=<AddBackward0>)
tensor(12.8439, grad_fn=<AddBackward0>)
tensor(3.0264, grad_fn=<AddBackward0>)
tensor(5.9113, grad_fn=<AddBackward0>)
tensor(2.6135, grad_fn=<AddBackward0>)
tensor(5.9152, grad_fn=<AddBackward0>)
tensor(8.4243, grad_fn=<AddBackward0>)
tensor(10.8893, grad_fn=<AddBackward0>)
tensor(3.0215, grad_fn=<AddBackward0>)
tensor(5.8215, grad_fn=<AddBackward0>)
tensor(2.6459, grad_

tensor(2.2088, grad_fn=<AddBackward0>)
tensor(2.6786, grad_fn=<AddBackward0>)
tensor(4.7727, grad_fn=<AddBackward0>)
tensor(8.1619, grad_fn=<AddBackward0>)
tensor(10.2858, grad_fn=<AddBackward0>)
tensor(12.6249, grad_fn=<AddBackward0>)
tensor(2.6702, grad_fn=<AddBackward0>)
tensor(4.7626, grad_fn=<AddBackward0>)
tensor(8.1481, grad_fn=<AddBackward0>)
tensor(10.2716, grad_fn=<AddBackward0>)
tensor(12.6061, grad_fn=<AddBackward0>)
tensor(2.8210, grad_fn=<AddBackward0>)
tensor(6.4528, grad_fn=<AddBackward0>)
tensor(8.2471, grad_fn=<AddBackward0>)
tensor(2.0771, grad_fn=<AddBackward0>)
tensor(4.0506, grad_fn=<AddBackward0>)
tensor(6.7107, grad_fn=<AddBackward0>)
tensor(2.8001, grad_fn=<AddBackward0>)
tensor(2.0711, grad_fn=<AddBackward0>)
tensor(4.0457, grad_fn=<AddBackward0>)
tensor(6.6993, grad_fn=<AddBackward0>)
tensor(2.6114, grad_fn=<AddBackward0>)
tensor(4.6971, grad_fn=<AddBackward0>)
tensor(8.0653, grad_fn=<AddBackward0>)
tensor(10.1850, grad_fn=<AddBackward0>)
tensor(12.5037, grad

tensor(2.0457, grad_fn=<AddBackward0>)
tensor(5.7479, grad_fn=<AddBackward0>)
tensor(7.8746, grad_fn=<AddBackward0>)
tensor(9.8934, grad_fn=<AddBackward0>)
tensor(2.7260, grad_fn=<AddBackward0>)
tensor(6.1849, grad_fn=<AddBackward0>)
tensor(8.1984, grad_fn=<AddBackward0>)
tensor(2.6451, grad_fn=<AddBackward0>)
tensor(2.0353, grad_fn=<AddBackward0>)
tensor(5.7322, grad_fn=<AddBackward0>)
tensor(7.8523, grad_fn=<AddBackward0>)
tensor(9.8617, grad_fn=<AddBackward0>)
tensor(2.6448, grad_fn=<AddBackward0>)
tensor(2.6401, grad_fn=<AddBackward0>)
tensor(2.4705, grad_fn=<AddBackward0>)
tensor(1.9288, grad_fn=<AddBackward0>)
tensor(3.9136, grad_fn=<AddBackward0>)
tensor(7.0546, grad_fn=<AddBackward0>)
tensor(9.1881, grad_fn=<AddBackward0>)
tensor(11.1599, grad_fn=<AddBackward0>)
tensor(2.4639, grad_fn=<AddBackward0>)
tensor(3.2276, grad_fn=<AddBackward0>)
tensor(5.3577, grad_fn=<AddBackward0>)
tensor(1.9282, grad_fn=<AddBackward0>)
tensor(3.9168, grad_fn=<AddBackward0>)
tensor(7.0549, grad_fn=<

In [8]:
y[i]

tensor([1])

In [10]:
# 测试函数
def translate(s):
    t = [lan1(i) for i in s.split()]
    t.append(EOS_token)
    print(t)
    f = encoder.sample(t)   # 编码
    s = decoder.sample(f)   # 解码
    r = [lan2.idx2word[i] for i in s]    # 根据id得到单词
    return ' '.join(r) # 生成句子
print(translate('are you ok .'))
# print(translate('我们 打 游戏 。'))

[2, 2, 2, 4, 1]
<SOS> 了 。 我 我 <EOS>
