In [117]:
from __future__ import unicode_literals, print_function, division

# import basic lib
import re
import math
import random
import string
import unicodedata
import numpy as np
from io import open

# import pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.autograd import Variable
from torch.nn.utils import clip_grad_norm_

# import helper function
import masked_cross_entropy
from infer_eval import bleu

# check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [118]:
SOS_idx = 0
EOS_idx = 1
UNK_idx = 2
PAD_idx = 3

USE_CUDA = True

class Preprocessor:
    '''
    class for preprocessing
    '''
    def __init__(self, name):
        '''
        initialize vocab and counter
        '''
        self.name = name
        self.w2idx = {"<sos>" : 0, "<eos>" : 1, "<unk>" : 2, "<pad>" : 3}
        self.counter = {}
        self.idx2w = {0: "<sos>", 1: "<eos>", 2:"<unk>", 3:"<pad>"}
        self.num = 4

    def SentenceAdder(self, sentence):
        '''
        Add a sentence to dataset
        '''
        for word in sentence.split(' '):
            self.WordAdder(word)

    def WordAdder(self, word):
        '''
        Add single word to dataset and update vocab and counter
        '''
        if word in self.w2idx:
            self.counter[word] += 1
        else:
            self.w2idx[word] = self.num
            self.counter[word] = 1
            self.idx2w[self.num] = word
            self.num += 1
            
    def trim(self, min_count=1):
        '''
        Trim to remove non-frequent word
        '''
        keep = []
        for k, v in self.counter.items():
            if v >= min_count: keep.append(k)
        print(self.name+':')
        print('Total words', len(self.w2idx))
        print('After Trimming', len(keep))
        print('Keep Ratio %', 100 * len(keep) / len(self.w2idx))
        self.w2idx = {"<sos>" : 0, "<eos>" : 1, "<unk>" : 2, "<pad>" : 3}
        self.counter = {}
        self.idx2w = {0: "<sos>", 1: "<eos>", 2:"<unk>", 3:"<pad>"}
        self.num = 4
        for w in keep:
            self.WordAdder(w)

In [119]:
def Uni2Ascii(s):
    '''
    transfer from unicode to ascii
    '''
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                    if unicodedata.category(c) != 'Mn')

def StrCleaner(s):
    '''
    trim, delete non-letter and lowercase string
    '''
    s = Uni2Ascii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

def DataReader(path, lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open(path, encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    #pairs = [[StrCleaner(s) for s in l.split('<------>')] for l in lines]
    pairs = [[s.lower() for s in l.split('<------>')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Preprocessor(lang2)
        output_lang = Preprocessor(lang1)
    else:
        input_lang = Preprocessor(lang1)
        output_lang = Preprocessor(lang2)

    return input_lang, output_lang, pairs

In [120]:
MIN_LENGTH = 3
MAX_LENGTH = 50

def filterPair(p):
    '''
    Filter to get expected pairs with specific length
    '''
    return MIN_LENGTH <= len(p[0].split(' ')) <= MAX_LENGTH and \
        MIN_LENGTH <= len(p[1].split(' ')) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [121]:
def prepareData(path, lang1, lang2, reverse=True):
    input_lang, output_lang, pairs = DataReader(path, lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.SentenceAdder(pair[0])
        output_lang.SentenceAdder(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.num)
    print(output_lang.name, output_lang.num)
    return input_lang, output_lang, pairs


src, tgt, pairs = prepareData('data/train.txt', 'english', 'chinese')
src.trim()
tgt.trim()
print(random.choice(pairs))

Reading lines...
Read 90000 sentence pairs
Trimmed to 77083 sentence pairs
Counting words...
Counted words:
chinese 46064
english 30499
chinese:
Total words 46064
After Trimming 46060
Keep Ratio % 99.99131642931573
english:
Total words 30499
After Trimming 30495
Keep Ratio % 99.9868848158956
['现在 非但 没有 这样的 迹像 , 反而 还有 进一步 扩大 的 趋势 .', 'now there are no such signs ; instead , the size of arms sales tends to further expand .']


In [122]:
test_src, test_tgt, test_pairs = prepareData('data/test.txt', 'english', 'chinese')
test_src.trim()
test_tgt.trim()
print(random.choice(test_pairs))
test_src.w2idx, test_src.idx2w, test_src.num = src.w2idx, src.idx2w, src.num
test_tgt.w2idx, test_tgt.idx2w, test_tgt.num = tgt.w2idx, tgt.idx2w, tgt.num
test_pairs.sort(key=lambda x: len(x[0].split()))

Reading lines...
Read 10000 sentence pairs
Trimmed to 8572 sentence pairs
Counting words...
Counted words:
chinese 17294
english 13049
chinese:
Total words 17294
After Trimming 17290
Keep Ratio % 99.9768705909564
english:
Total words 13049
After Trimming 13045
Keep Ratio % 99.96934631006208
['不但 要 准备 充足 的 弹药 , 油料 , 装备 , 器材 , 备件 , 给养 等 , 还需要 准备 大量 的 登陆 输送 工具 .', 'apart from preparing sufficient ammunition , fuel , equipment , hardware , spare parts , provisions , and so on , it is also necessary to prepare a great amount of landing transportation means .']


In [123]:
def sentence2idx(preprocessor, sentence):
    '''
    Read sentence and translate into word index plus eos
    '''
    return [SOS_idx] + [preprocessor.w2idx[w] if w in preprocessor.w2idx \
            else UNK_idx for w in sentence.split(' ')] + [EOS_idx]

def pad(seq, max_len):
    '''
    Add padding to sentence with different length
    '''
    seq += [PAD_idx for i in range(max_len - len(seq))]
    return seq

def random_batch(src, tgt, pairs, batch_size, batch_idx):
    '''
    Randomly generate batch data
    '''
    inputs, target = [], []
    
    # Choose batch
    for s in pairs[batch_idx*batch_size:(batch_idx+1)*batch_size]:
        inputs.append(sentence2idx(src, s[0]))
        target.append(sentence2idx(tgt, s[1]))
        
    # Sort by length
    seq_pairs = sorted(zip(inputs, target), key=lambda p: len(p[0]), reverse=True)
    inputs, target = zip(*seq_pairs)
    
    # Obtain length of each sentence and pad
    input_lens = [len(s) for s in inputs]
    input_max = max(input_lens)
    input_padded = [pad(s, input_max) for s in inputs]
    target_lens = [len(s) for s in target]
    target_max = max(target_lens)
    target_padded = [pad(s, target_max) for s in target]

    # Create Variable
    if USE_CUDA:
        input_vars = Variable(torch.LongTensor(input_padded).cuda()).transpose(0, 1)
        input_lens = Variable(torch.LongTensor(input_lens).cuda())
        target_vars = Variable(torch.LongTensor(target_padded).cuda()).transpose(0, 1)
        target_lens = Variable(torch.LongTensor(target_lens).cuda())
    else:
        input_vars = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
        input_lens = Variable(torch.LongTensor(input_lens))
        target_vars = Variable(torch.LongTensor(target_padded)).transpose(0, 1)
        target_lens = Variable(torch.LongTensor(target_lens))

    return input_vars, input_lens, target_vars, target_lens

def user_input(inputs, src):
    inp_list = [sentence2idx(src, inputs)]
    input_lens = [len(s) for s in inp_list]
    input_max = max(input_lens)
    input_padded = [pad(s, input_max) for s in inp_list]
    if USE_CUDA:
        input_vars = Variable(torch.LongTensor(input_padded).cuda()).transpose(0, 1)
        input_lens = Variable(torch.LongTensor(input_lens).cuda())
    else:
        input_vars = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
        input_lens = Variable(torch.LongTensor(input_lens))
    return input_vars, input_lens

In [124]:
class Encoder(nn.Module):
    '''
    Define encoder and forward process
    '''
    def __init__(self, dim_input, dim_embed, dim_hidden, num_layers, dropout):
        super(Encoder, self).__init__()
        self.dim_input = dim_input
        self.dim_hidden = dim_hidden
        self.dim_embed = dim_embed
        self.embed = nn.Embedding(dim_input, dim_embed)
        self.cell = nn.GRU(dim_embed, dim_hidden, 
                          num_layers, dropout=dropout, 
                          bidirectional=True)
        
    def init_hidden(self):
        if USE_CUDA:
            return Variable(torch.zeros(self.n_layers, 1, self.hidden_size).cuda())
        else:
            return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
        
    def forward(self, inputs, inputs_lens, hidden=None):
        '''
        We need to sum the outputs since bi-diretional is used
        '''
        embedded = self.embed(inputs)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, inputs_lens)
        outputs, hidden = self.cell(packed, hidden)
        outputs, output_lengths = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:, :, :self.dim_hidden] + \
                    outputs[:, :, self.dim_hidden:]
        return outputs, hidden


class Attention(nn.Module):
    '''
    Define attention mechanism
    '''
    def __init__(self, dim_hidden):
        super(Attention, self).__init__()
        self.dim_hidden = dim_hidden
        # 2*dim_hidden is needed since bi-direction is used
        self.attn = nn.Linear(2*self.dim_hidden, dim_hidden)
        self.v = nn.Parameter(torch.rand(dim_hidden))
        stdv = 1. / math.sqrt(self.v.size(0))
        self.v.data.uniform_(-stdv, stdv)

    def forward(self, hidden, encoder_outputs):
        timestep = encoder_outputs.size(0)
        h = hidden.repeat(timestep, 1, 1).transpose(0, 1)
        encoder_outputs = encoder_outputs.transpose(0, 1)
        scores = self.score(h, encoder_outputs)
        return F.relu(scores).unsqueeze(1)

    def score(self, hidden, encoder_outputs):
        e = F.softmax(self.attn(torch.cat([hidden, encoder_outputs], 2)),dim=1)
        e = e.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        e = torch.bmm(v, e)
        return e.squeeze(1)


class Decoder(nn.Module):
    '''
    Define decoder with attention
    '''
    def __init__(self, dim_embed, dim_hidden, dim_output, num_layers, dropout):
        super(Decoder, self).__init__()
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.dim_output = dim_output
        self.num_layers = num_layers

        self.embed = nn.Embedding(dim_output, dim_embed)
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.attention = Attention(dim_hidden)
        self.cell = nn.GRU(dim_hidden + dim_embed, dim_hidden,
                          num_layers, dropout=dropout)
        self.out = nn.Linear(2*dim_hidden, dim_output)

    def forward(self, inputs, last_hidden, encoder_outputs):
        
        embedded = self.embed(inputs).unsqueeze(0)  # (1,B,N)
        embedded = self.dropout(embedded)
        
        attn_weights = self.attention(last_hidden[-1], encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))  # (B,1,N)
        context = context.transpose(0, 1)  # (1,B,N)
        
        rnn_input = torch.cat([embedded, context], 2)
        output, hidden = self.cell(rnn_input, last_hidden)
        output = output.squeeze(0)  # (1,B,N) -> (B,N)
        context = context.squeeze(0)
        output = self.out(torch.cat([output, context], 1))
        output = F.log_softmax(output, dim=1)
        return output, hidden, attn_weights

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, src_len, tgt, tgt_len, teacher_forcing_ratio=0.5):
        batch_size = src.size(1)
        max_len = tgt.size(0)
        vocab_size = self.decoder.dim_output
        if USE_CUDA:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size).cuda())
        else:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size))
        encoder_output, hidden = self.encoder(src, src_len)
        hidden = hidden[:self.decoder.num_layers]
        # Put <sos> at first position
        if USE_CUDA:
            output = Variable(tgt.data[0, :].cuda())
        else:
            output = Variable(tgt.data[0, :])
        for t in range(1, max_len):
            output, hidden, attn_weights = self.decoder(
                    output, hidden, encoder_output)
            outputs[t] = output
            # Randomly choose whether to use teacher force or not
            is_teacher = random.random() < teacher_forcing_ratio
            top1 = output.data.max(1)[1]
            if USE_CUDA:
                output = Variable(tgt.data[t].cuda() if is_teacher else top1.cuda())
            else:
                output = Variable(tgt.data[t] if is_teacher else top1)
        return outputs
    
    def inference(self, src, src_len, max_len = MAX_LENGTH):
        pred_idx = []
        batch_size = src.size(1)
        vocab_size = self.decoder.dim_output
        if USE_CUDA:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size).cuda())
        else:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size))
        
        encoder_output, hidden = self.encoder(src, src_len, None)
        hidden = hidden[:self.decoder.num_layers]
        # Put <sos> at first position
        if USE_CUDA:
            output = Variable(src.data[0, :].cuda())
        else:
            output = Variable(src.data[0, :])
        pred_idx.append(SOS_idx)
        for t in range(1, max_len):
            output, hidden, attn_weights = self.decoder(
                    output, hidden, encoder_output)
            outputs[t] = output
            top1 = output.data.max(1)[1]
            pred_idx.append(top1.item())
            if USE_CUDA:
                output = Variable(top1.cuda())
            else:
                output = Variable(top1)
            if top1 == EOS_idx: break
        return outputs, pred_idx

In [125]:
batch_size = 64
hidden_size = 512
embed_size = 256
encoder_n_layers = 2
decoder_n_layers = 1
encoder_test = Encoder(src.num, embed_size, hidden_size, encoder_n_layers, dropout=0.2)
decoder_test = Decoder(embed_size, hidden_size, tgt.num, decoder_n_layers, dropout=0.2)

  "num_layers={}".format(dropout, num_layers))


In [126]:
net = Seq2Seq(encoder_test,decoder_test).cuda()
print(net)

Seq2Seq(
  (encoder): Encoder(
    (embed): Embedding(46064, 256)
    (cell): GRU(256, 512, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (embed): Embedding(30499, 256)
    (dropout): Dropout(p=0.2, inplace)
    (attention): Attention(
      (attn): Linear(in_features=1024, out_features=512, bias=True)
    )
    (cell): GRU(768, 512, dropout=0.2)
    (out): Linear(in_features=1024, out_features=30499, bias=True)
  )
)


In [127]:
opt = optim.Adam(net.parameters(),lr=0.0001)

In [128]:
net.load_state_dict(torch.load('./saved_model_1.pt'))

In [None]:
net.train()

grad_clip = 5
num_batch = len(pairs) // batch_size
print_every_batches = 200
save_every_batches = 200
valid_every_epochs = print_every_batches
pairs.sort(key=lambda x: len(x[0].split()))

for epoch in range(1,50000):
    total_loss = 0
    tmp_loss = 0
    for batch_idx in range(num_batch):
        input_batches, input_lengths,\
            target_batches, target_lengths = random_batch(src,tgt,pairs,batch_size,batch_idx)
        
        opt.zero_grad()
        output = net(input_batches, input_lengths, target_batches, target_lengths)

        #loss = masked_cross_entropy.compute_loss(
        #    output.transpose(0, 1).contiguous(),
        #    target_batches.transpose(0, 1).contiguous(),
        #    target_lengths
        #)
        loss = F.nll_loss(output[1:].view(-1,tgt.num),
                          target_batches[1:].contiguous().view(-1),
                          ignore_index=PAD_idx)
        
        tmp_loss += loss.item()
        if (batch_idx+1) % save_every_batches == 0:
            torch.save(net.state_dict(), './saved.pt')
        clip_grad_norm_(net.parameters(), grad_clip)
        loss.backward()
        opt.step()
        if (batch_idx+1) % print_every_batches == 0:
            opt.zero_grad()
            input_batches, input_lengths,\
                target_batches, target_lengths = random_batch(src,tgt,pairs,batch_size,batch_idx)
            for test_idx in range(1):
                _, pred = net.inference(input_batches[:,test_idx].reshape(input_lengths[0].item(),1),input_lengths[0].reshape(1))
                inp = ' '.join([src.idx2w[t] for t in input_batches[:,test_idx].cpu().numpy()])
                mt = ' '.join([tgt.idx2w[t] for t in pred if t!= PAD_idx])
                ref = ' '.join([tgt.idx2w[t] for t in target_batches[:,test_idx].cpu().numpy()])
                print('INPUT:\n' + inp)
                print('REF:\n' + ref)
                print('PREDICTION:\n' + mt)
                print('BLEU = %f' % bleu([mt],[[ref]],4))
                print("------")
            output = net(input_batches, input_lengths, target_batches, target_lengths)

            #loss = masked_cross_entropy.compute_loss(
            #    output.transpose(0, 1).contiguous(),
            #    target_batches.transpose(0, 1).contiguous(),
            #    target_lengths
            #)
            loss = F.nll_loss(output[1:].view(-1,tgt.num),
                              target_batches[1:].contiguous().view(-1),
                              ignore_index=PAD_idx)
            print("Epoch %d Batch Num %d Train Loss: %f Test Loss: %f"%(epoch, batch_idx+1, tmp_loss/print_every_batches, loss.item()))
            tmp_loss = 0

    print('epoch %d finished !'%(epoch))
    random.shuffle(pairs)

# Check Inference result of train data

In [141]:
pairs.sort(key=lambda x: len(x[0].split()))

In [142]:
from infer_eval import bleu

net.eval()
test_range = 50
ave_bleu = 0
for test_idx in range(50000,50100):
    input_batches, input_lengths,\
        target_batches, target_lengths = random_batch(src,tgt,pairs,1,test_idx)
    _, pred = net.inference(input_batches[:,0].reshape(input_lengths[0].item(),1),input_lengths[0].reshape(1))
    inp = ' '.join([src.idx2w[t] for t in input_batches[:,0].cpu().numpy()])
    mt = ' '.join([tgt.idx2w[t] for t in pred if t!= PAD_idx])
    ref = ' '.join([tgt.idx2w[t] for t in target_batches[:,0].cpu().numpy() if t != PAD_idx])
    print('INPUT:\n' + inp)
    print('REF:\n' + ref)
    print('PREDICTION:\n' + mt)
    tmp_score = bleu([mt],[[ref]],4)
    ave_bleu += tmp_score
    print('BLEU = %f' % tmp_score)
    print("------")
print('Average BLEU = '+str(ave_bleu/test_range))

INPUT:
<sos> 当前 , 要 着力 抓好 以下 几 方面 的 工作 . 一 是 要 加快 建立 现代 企业 制度 , 转换 企业 经营 机制 . <eos>
REF:
<sos> at present it is necessary to focus on doing a good job in the following respects : first , speed up the establishment of a modern enterprise system and change the enterprises ' operational mechanism . <eos>
PREDICTION:
<sos> at present it is necessary to focus on doing a good job in respects following respects : first , speed up the establishment of a modern enterprise system and change the enterprises ' operational mechanism . <eos>
BLEU = 0.929924
------
INPUT:
<sos> 此外 , 即便 是 已 公布 于 联邦 公报 但 尚未 生效 的 法令 , 布什 也 下达 了 为期 60日 的 暂缓 令 . <eos>
REF:
<sos> in addition , with respect to those laws that have been published in the federal register but have not yet taken effect , bush also issued order to postpone the effective date of these laws by 60 days . <eos>
PREDICTION:
<sos> in addition , the laws of those laws have been published in effect , but federal register have been issued by bush , but a

INPUT:
<sos> 江泽民 说 : " 今天 , 我 和 普京 总统 共同 签署 了 具有 里程碑 意义 的 《 中俄 睦邻 友好 合作 条约 》 . <eos>
REF:
<sos> jiang zemin said : " today , president putin and i signed the ' chinese - russian good - neighborly treaty of friendship and cooperation , ' which has the significance of a milestone . <eos>
PREDICTION:
<sos> jiang zemin said , " today , president putin and i signed the sino - russian treaty ' good - neighborly and friendship and cooperation of the chinese of which ' which is the milestone . <eos>
BLEU = 0.415239
------
INPUT:
<sos> 他 说 , 农德孟 主席 在 北京 和 纽约 都 曾 邀请 过 我 , 我 期待 著 有机会 再次 访问 友好 邻邦 越南 . <eos>
REF:
<sos> he said : chairman nong duc manh had invited me both in beijing and in new york , and i am looking forward to an opportunity to visit our good neighbor vietnam once again . <eos>
PREDICTION:
<sos> he said : chairman nong duc manh had invited me in beijing and both new york and had once again to me forward an opportunity to visit vietnam neighbor in vietnam again . <eos>
BLEU = 0.4570

INPUT:
<sos> 对 邪教 组织 的 仁慈 , 就是 对 人民 生命 安全 和 社会 稳定 的 淡漠 , 也是 对 人类 正义 事业 的 犯罪 . <eos>
REF:
<sos> leniency to cults means indifference to the people 's lives and to social stability , which is also tantamount to committing a crime to mankind 's just cause . <eos>
PREDICTION:
<sos> leniency to cults to indifference to people 's lives , which is tantamount to committing to committing crime 's crime 's which is just a crime 's social stability . <eos>
BLEU = 0.208308
------
INPUT:
<sos> 既 发扬 民族 优秀 文化 传统 , 又 博采 世界 各国 文明 之 长 , 同时 坚决 抵制 各种 腐朽 思想 文化 的 侵蚀 . <eos>
REF:
<sos> in so doing , we have carried forward our nation 's fine cultural heritage and benefited from different civilizations of the world . at the same time , we have resolutely resisted all kinds of corrosive , decadent ideologies and cultures . <eos>
PREDICTION:
<sos> in doing so , we have carried forward the nation 's fine heritage of fine civilizations and benefited from the world at all same time , we have resolutely resisted al

INPUT:
<sos> 目前 , 全球 直接 投资 的 一半 以上 是 以 跨国 企业 并购 形式 进行 的 , 并且 90% 以上 发生 在 发达国家 之间 . <eos>
REF:
<sos> now , more than half of direct global investments have been made in the form of merger and acquisition by transnational enterprises , and over 90 percent of such mergers and acquisitions have taken place in developed countries . <eos>
PREDICTION:
<sos> now , more than half of direct investments in the have taken place in direct transnational merger and acquisition of transnational investment by transnational countries in such form have acquisitions have taken place in acquisitions in such mergers acquisitions . <eos>
BLEU = 0.336664
------
INPUT:
<sos> " 香港 法轮 佛学 会 " 的 活动 反华 反 中央 , 危害 国家 安全 , 损害 港人 社会 , 已 事实 俱 在 . <eos>
REF:
<sos> the facts are all there to prove that activities of the hong kong falun buddhist society are against china and the central authorities , and they are detrimental to our national security as well as hong kong 's society . <eos>
PREDICTION:
<sos> the facts are al

INPUT:
<sos> 要 尽量 多 征集 文化 素质 高 和 具有 军 地 通用 专业 技术 特长 的 青年 入伍 , 保证 部队 建设 的 需要 . <eos>
REF:
<sos> it is necessary to recruit as many as possible young people who have better education and professional skills useful for military as well as local development purposes so as to meet the construction needs of various pla units . <eos>
PREDICTION:
<sos> it is necessary to recruit as many as possible as possible as possible for young people who have better skills useful skills as well as local purposes as local pla units and as to meet various needs of various pla units . <eos>
BLEU = 0.513259
------
INPUT:
<sos> 例如 , 资讯 科技 的 发展 日新月异 , 往往 是 政策 上 落后 一 年 , 等 於 在 生产力 上 落后 五 年 . <eos>
REF:
<sos> take information technology for example . information technology changes with each passing day . a year of delay in policymaking often means five years of delay in productivity . <eos>
PREDICTION:
<sos> take information technology for example for example of information technology technology often change in e

INPUT:
<sos> 第五 , 在 整个 税收 优惠 政策 调整 的 过程 中 , 应 尽可能 保持 原有 的 税收 规模 不变 , 或 有所 扩大 . <eos>
REF:
<sos> fifth , in the entire course of adjusting the tax preferential policies , the original tax scale should be kept intact as much as possible or even expanded to some extent . <eos>
PREDICTION:
<sos> fifth , in the entire course of adjusting the original tax scale , the original tax should be kept intact as much as possible or even expanded some extent . <eos>
BLEU = 0.732582
------
INPUT:
<sos> " 微笑 列车 " 自 去年 起 与 中华 慈善 总会 合作 , 已经 成功地 为 2000 中国 唇 腭裂 患儿 进行 了 手术 . <eos>
REF:
<sos> since its cooperation with the china charity council beginning last year , the " smiling train " has successfully carried out operations for over 2,000 chinese children with harelips . <eos>
PREDICTION:
<sos> since the train charity with the charity charity train last year , " china has successfully carried out its operations over over 2,000 chinese children with harelips with harelips . <eos>
BLEU = 0.371168
------
INP

INPUT:
<sos> 越南 共产党 , 越南 国会 和 越南 政府 对 中国 共产党 , 中国 全国 人大 和 中国 政府 给予 的 巨大 支持 和 帮助 永志不忘 . <eos>
REF:
<sos> the vietnamese communist party , the vietnamese national assembly and the vietnamese government will never forget the tremendous support and help extended to them by the cpc , the chinese npc and the chinese government . <eos>
PREDICTION:
<sos> the vietnamese communist party and the vietnamese national assembly , the tremendous help and the tremendous support and the chinese government will help them to help forget the tremendous support and the chinese by the cpc . <eos>
BLEU = 0.395674
------
INPUT:
<sos> 让 我们 齐心协力 , 迎 著 地平线 上 崭露 的 新世纪 的 曙光 , 把 中美 关系 的 航船 驶 向 充满 希望 的 未来 . <eos>
REF:
<sos> let us with one heart and one mind , steer the ship of sino - us relationship toward a future that is full of hope against the backdrop of the ray of dawn of the new century that arises over the horizon . <eos>
PREDICTION:
<sos> let us heart and one mind , steer the ship of sino - us relationship 

# Check inference result of test data

In [143]:
test_src, test_tgt, test_pairs = prepareData('data/test.txt', 'english', 'chinese')
test_src.trim()
test_tgt.trim()
print(random.choice(test_pairs))

Reading lines...
Read 10000 sentence pairs
Trimmed to 8572 sentence pairs
Counting words...
Counted words:
chinese 17294
english 13049
chinese:
Total words 17294
After Trimming 17290
Keep Ratio % 99.9768705909564
english:
Total words 13049
After Trimming 13045
Keep Ratio % 99.96934631006208
['新华社 伊斯兰堡 4月20日 电 ( 新华社 记者 杜振丰 人民 日报 记者 丁子 ) 巴基斯坦 总统 拉菲克 . 塔拉尔 和 首席执行官 佩尔韦兹 . 穆沙拉夫 20日 分别 会见 了 正在 此间 访问 的 中央军委 委员 , 中国人民 解放军 总参谋长 傅全 有 .', "islamabad , 20 april ( xinhua ) - pakistani president rafiq tarar and chief executive pervez musharraf on 20 april met fu quanyou , member of the central military commission and chief of general staff of the chinese people 's liberation army [ pla ] respectively ."]


In [144]:
test_src.w2idx, test_src.idx2w, test_src.num = src.w2idx, src.idx2w, src.num
test_tgt.w2idx, test_tgt.idx2w, test_tgt.num = tgt.w2idx, tgt.idx2w, tgt.num
test_pairs.sort(key=lambda x: len(x[0].split()))

In [145]:
from infer_eval import bleu

net.eval()
test_range = 50
ave_bleu = 0
for test_idx in range(test_range):
    input_batches, input_lengths,\
        target_batches, target_lengths = random_batch(src,tgt,test_pairs,1,test_idx)
    _, pred = net.inference(input_batches[:,0].reshape(input_lengths[0].item(),1),input_lengths[0].reshape(1))
    inp = ' '.join([test_src.idx2w[t] for t in input_batches[:,0].cpu().numpy()])
    mt = ' '.join([test_tgt.idx2w[t] for t in pred if t!= PAD_idx])
    ref = ' '.join([test_tgt.idx2w[t] for t in target_batches[:,0].cpu().numpy() if t != PAD_idx])
    print('INPUT:\n' + inp)
    print('REF:\n' + ref)
    print('PREDICTION:\n' + mt)
    tmp_score = bleu([mt],[[ref]],4)
    ave_bleu += tmp_score
    print('BLEU = %f' % tmp_score)
    print("------")
print('Average BLEU = '+str(ave_bleu/test_range))

INPUT:
<sos> 时光 <unk> . <eos>
REF:
<sos> time really flies like a shuttle . <eos>
PREDICTION:
<sos> as a matter of fact . <eos>
BLEU = 0.456227
------
INPUT:
<sos> 分担 经费 . <eos>
REF:
<sos> sharing the expense . <eos>
PREDICTION:
<sos> there funds . <eos>
BLEU = 0.509523
------
INPUT:
<sos> 中国 ! " <eos>
REF:
<sos> china ! " <eos>
PREDICTION:
<sos> " china ! " <eos>
BLEU = 0.537285
------
INPUT:
<sos> 提高 素质 迫在眉睫 <eos>
REF:
<sos> this is the only way to succeed . <eos>
PREDICTION:
<sos> raising raising quality quality of raising the quality ? <eos>
BLEU = 0.722657
------
INPUT:
<sos> 权责 一体 . <eos>
REF:
<sos> integration of right and responsibility . <eos>
PREDICTION:
<sos> integration are . <eos>
BLEU = 0.436454
------
INPUT:
<sos> 显然 不是 . <eos>
REF:
<sos> obvious not . <eos>
PREDICTION:
<sos> obviously , not is a . <eos>
BLEU = 0.516973
------
INPUT:
<sos> 你们 一定 能行 ! <eos>
REF:
<sos> you surely can do it ! <eos>
PREDICTION:
<sos> you you you you you ! <eos>
BLEU = 0.614788
------
INPUT:


# User Input Translation

In [139]:
inputs = '今天 我 要 上学 .'
user_var, user_len = user_input(inputs, src)
net.eval()
_, pred = net.inference(user_var[:,0].reshape(user_len[0].item(),1),user_len[0].reshape(1))
inp = ' '.join([src.idx2w[t] for t in user_var[:,0].cpu().numpy()])
mt = ' '.join([tgt.idx2w[t] for t in pred if t!= PAD_idx])
print('INPUT:\n' + inp)
print('PREDICTION:\n' + mt)

INPUT:
<sos> 今天 我 要 上学 . <eos>
PREDICTION:
<sos> today , we should deeply in today . <eos>
