In [1]:
from __future__ import unicode_literals, print_function, division

# import basic lib
import re
import math
import random
import string
import unicodedata
import numpy as np
from io import open

# import pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.autograd import Variable
from torch.nn.utils import clip_grad_norm_

# import loss func
import masked_cross_entropy

# check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
SOS_idx = 0
EOS_idx = 1
UNK_idx = 2
PAD_idx = 3

USE_CUDA = True

class Preprocessor:
    '''
    class for preprocessing
    '''
    def __init__(self, name):
        '''
        initialize vocab and counter
        '''
        self.name = name
        self.w2idx = {"<sos>" : 0, "<eos>" : 1, "<unk>" : 2, "<pad>" : 3}
        self.counter = {}
        self.idx2w = {0: "<sos>", 1: "<eos>", 2:"<unk>", 3:"<pad>"}
        self.num = 4

    def SentenceAdder(self, sentence):
        '''
        Add a sentence to dataset
        '''
        for word in sentence.split(' '):
            self.WordAdder(word)

    def WordAdder(self, word):
        '''
        Add single word to dataset and update vocab and counter
        '''
        if word in self.w2idx:
            self.counter[word] += 1
        else:
            self.w2idx[word] = self.num
            self.counter[word] = 1
            self.idx2w[self.num] = word
            self.num += 1
            
    def trim(self, min_count=1):
        '''
        Trim to remove non-frequent word
        '''
        keep = []
        for k, v in self.counter.items():
            if v >= min_count: keep.append(k)
        print(self.name+':')
        print('Total words', len(self.w2idx))
        print('After Trimming', len(keep))
        print('Keep Ratio %', 100 * len(keep) / len(self.w2idx))
        self.w2idx = {"<sos>" : 0, "<eos>" : 1, "<unk>" : 2, "<pad>" : 3}
        self.counter = {}
        self.idx2w = {0: "<sos>", 1: "<eos>", 2:"<unk>", 3:"<pad>"}
        self.num = 4
        for w in keep:
            self.WordAdder(w)

In [3]:
def Uni2Ascii(s):
    '''
    transfer from unicode to ascii
    '''
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                    if unicodedata.category(c) != 'Mn')

def StrCleaner(s):
    '''
    trim, delete non-letter and lowercase string
    '''
    s = Uni2Ascii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

def DataReader(path, lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open(path, encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    #pairs = [[StrCleaner(s) for s in l.split('<------>')] for l in lines]
    pairs = [[s.lower() for s in l.split('<------>')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Preprocessor(lang2)
        output_lang = Preprocessor(lang1)
    else:
        input_lang = Preprocessor(lang1)
        output_lang = Preprocessor(lang2)

    return input_lang, output_lang, pairs

In [4]:
MIN_LENGTH = 3
MAX_LENGTH = 50

def filterPair(p):
    '''
    Filter to get expected pairs with specific length
    '''
    return MIN_LENGTH <= len(p[0].split(' ')) <= MAX_LENGTH and \
        MIN_LENGTH <= len(p[1].split(' ')) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [5]:
def prepareData(path, lang1, lang2, reverse=True):
    input_lang, output_lang, pairs = DataReader(path, lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.SentenceAdder(pair[0])
        output_lang.SentenceAdder(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.num)
    print(output_lang.name, output_lang.num)
    return input_lang, output_lang, pairs


src, tgt, pairs = prepareData('data/train.txt', 'english', 'chinese')
src.trim()
tgt.trim()
print(random.choice(pairs))

Reading lines...
Read 90000 sentence pairs
Trimmed to 77083 sentence pairs
Counting words...
Counted words:
chinese 46064
english 30499
chinese:
Total words 46064
After Trimming 46060
Keep Ratio % 99.99131642931573
english:
Total words 30499
After Trimming 30495
Keep Ratio % 99.9868848158956
['新 的 最大 的 乱 源 : " 李扁 " / 伍源 李登辉 正 利用 尚 存 的 政治 影响力 , 与 陈水 扁 结盟 , 力图 主 控 岛内 政局 走向 .', "lee teng - hui [ li denghui ] has been using his remaining political influence to form an alliance with ch ' en shui - bian [ chen shuibian ] in a vigorous attempt to dominate the political course in taiwan ."]


In [38]:
def sentence2idx(preprocessor, sentence):
    '''
    Read sentence and translate into word index plus eos
    '''
    return [SOS_idx] + [preprocessor.w2idx[w] if w in preprocessor.w2idx \
            else UNK_idx for w in sentence.split(' ')] + [EOS_idx]

def pad(seq, max_len):
    '''
    Add padding to sentence with different length
    '''
    seq += [PAD_idx for i in range(max_len - len(seq))]
    return seq

def random_batch(src, tgt, pairs, batch_size, batch_idx):
    '''
    Randomly generate batch data
    '''
    inputs, target = [], []
    
    # Choose batch
    for s in pairs[batch_idx*batch_size:(batch_idx+1)*batch_size]:
        inputs.append(sentence2idx(src, s[0]))
        target.append(sentence2idx(tgt, s[1]))
        
    # Sort by length
    seq_pairs = sorted(zip(inputs, target), key=lambda p: len(p[0]), reverse=True)
    inputs, target = zip(*seq_pairs)
    
    # Obtain length of each sentence and pad
    input_lens = [len(s) for s in inputs]
    input_max = max(input_lens)
    input_padded = [pad(s, input_max) for s in inputs]
    target_lens = [len(s) for s in target]
    target_max = max(target_lens)
    target_padded = [pad(s, target_max) for s in target]

    # Create Variable
    if USE_CUDA:
        input_vars = Variable(torch.LongTensor(input_padded).cuda()).transpose(0, 1)
        input_lens = Variable(torch.LongTensor(input_lens).cuda())
        target_vars = Variable(torch.LongTensor(target_padded).cuda()).transpose(0, 1)
        target_lens = Variable(torch.LongTensor(target_lens).cuda())
    else:
        input_vars = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
        input_lens = Variable(torch.LongTensor(input_lens))
        target_vars = Variable(torch.LongTensor(target_padded)).transpose(0, 1)
        target_lens = Variable(torch.LongTensor(target_lens))

    return input_vars, input_lens, target_vars, target_lens

In [7]:
class Encoder(nn.Module):
    '''
    Define encoder and forward process
    '''
    def __init__(self, dim_input, dim_embed, dim_hidden, num_layers, dropout):
        super(Encoder, self).__init__()
        self.dim_input = dim_input
        self.dim_hidden = dim_hidden
        self.dim_embed = dim_embed
        self.embed = nn.Embedding(dim_input, dim_embed)
        self.cell = nn.GRU(dim_embed, dim_hidden, 
                          num_layers, dropout=dropout, 
                          bidirectional=True)
        
    def init_hidden(self):
        if USE_CUDA:
            return Variable(torch.zeros(self.n_layers, 1, self.hidden_size).cuda())
        else:
            return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
        
    def forward(self, inputs, inputs_lens, hidden=None):
        '''
        We need to sum the outputs since bi-diretional is used
        '''
        embedded = self.embed(inputs)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, inputs_lens)
        outputs, hidden = self.cell(packed, hidden)
        outputs, output_lengths = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:, :, :self.dim_hidden] + \
                    outputs[:, :, self.dim_hidden:]
        return outputs, hidden


class Attention(nn.Module):
    '''
    Define attention mechanism
    '''
    def __init__(self, dim_hidden):
        super(Attention, self).__init__()
        self.dim_hidden = dim_hidden
        # 2*dim_hidden is needed since bi-direction is used
        self.attn = nn.Linear(2*self.dim_hidden, dim_hidden)
        self.v = nn.Parameter(torch.rand(dim_hidden))
        stdv = 1. / math.sqrt(self.v.size(0))
        self.v.data.uniform_(-stdv, stdv)

    def forward(self, hidden, encoder_outputs):
        timestep = encoder_outputs.size(0)
        h = hidden.repeat(timestep, 1, 1).transpose(0, 1)
        encoder_outputs = encoder_outputs.transpose(0, 1)
        scores = self.score(h, encoder_outputs)
        return F.relu(scores).unsqueeze(1)

    def score(self, hidden, encoder_outputs):
        e = F.softmax(self.attn(torch.cat([hidden, encoder_outputs], 2)),dim=1)
        e = e.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        e = torch.bmm(v, e)
        return e.squeeze(1)


class Decoder(nn.Module):
    '''
    Define decoder with attention
    '''
    def __init__(self, dim_embed, dim_hidden, dim_output, num_layers, dropout):
        super(Decoder, self).__init__()
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden
        self.dim_output = dim_output
        self.num_layers = num_layers

        self.embed = nn.Embedding(dim_output, dim_embed)
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.attention = Attention(dim_hidden)
        self.cell = nn.GRU(dim_hidden + dim_embed, dim_hidden,
                          num_layers, dropout=dropout)
        self.out = nn.Linear(2*dim_hidden, dim_output)

    def forward(self, inputs, last_hidden, encoder_outputs):
        
        embedded = self.embed(inputs).unsqueeze(0)  # (1,B,N)
        embedded = self.dropout(embedded)
        
        attn_weights = self.attention(last_hidden[-1], encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))  # (B,1,N)
        context = context.transpose(0, 1)  # (1,B,N)
        
        rnn_input = torch.cat([embedded, context], 2)
        output, hidden = self.cell(rnn_input, last_hidden)
        output = output.squeeze(0)  # (1,B,N) -> (B,N)
        context = context.squeeze(0)
        output = self.out(torch.cat([output, context], 1))
        output = F.log_softmax(output, dim=1)
        return output, hidden, attn_weights

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, src_len, tgt, tgt_len, teacher_forcing_ratio=0.5):
        batch_size = src.size(1)
        max_len = tgt.size(0)
        vocab_size = self.decoder.dim_output
        if USE_CUDA:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size).cuda())
        else:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size))
        encoder_output, hidden = self.encoder(src, src_len)
        hidden = hidden[:self.decoder.num_layers]
        # Put <sos> at first position
        if USE_CUDA:
            output = Variable(tgt.data[0, :].cuda())
        else:
            output = Variable(tgt.data[0, :])
        for t in range(1, max_len):
            output, hidden, attn_weights = self.decoder(
                    output, hidden, encoder_output)
            outputs[t] = output
            # Randomly choose whether to use teacher force or not
            is_teacher = random.random() < teacher_forcing_ratio
            top1 = output.data.max(1)[1]
            if USE_CUDA:
                output = Variable(tgt.data[t].cuda() if is_teacher else top1.cuda())
            else:
                output = Variable(tgt.data[t] if is_teacher else top1)
        return outputs
    
    def inference(self, src, src_len, max_len = MAX_LENGTH):
        pred_idx = []
        batch_size = src.size(1)
        vocab_size = self.decoder.dim_output
        if USE_CUDA:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size).cuda())
        else:
            outputs = Variable(torch.zeros(max_len, batch_size, vocab_size))
        
        encoder_output, hidden = self.encoder(src, src_len)
        hidden = hidden[:self.decoder.num_layers]
        # Put <sos> at first position
        if USE_CUDA:
            output = Variable(src.data[0, :].cuda())
        else:
            output = Variable(src.data[0, :])
        pred_idx.append(SOS_idx)
        for t in range(1, max_len):
            output, hidden, attn_weights = self.decoder(
                    output, hidden, encoder_output)
            outputs[t] = output
            top1 = output.data.max(1)[1]
            pred_idx.append(top1.item())
            if USE_CUDA:
                output = Variable(top1.cuda())
            else:
                output = Variable(top1)
            if top1 == EOS_idx: break
        return outputs, pred_idx

In [8]:
batch_size = 128
hidden_size = 512
embed_size = 256
encoder_n_layers = 2
decoder_n_layers = 1
encoder_test = Encoder(src.num, embed_size, hidden_size, encoder_n_layers, dropout=0.2)
decoder_test = Decoder(embed_size, hidden_size, tgt.num, decoder_n_layers, dropout=0.2)

  "num_layers={}".format(dropout, num_layers))


In [9]:
net = Seq2Seq(encoder_test,decoder_test).cuda()
print(net)

Seq2Seq(
  (encoder): Encoder(
    (embed): Embedding(46064, 256)
    (cell): GRU(256, 512, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (embed): Embedding(30499, 256)
    (dropout): Dropout(p=0.2, inplace)
    (attention): Attention(
      (attn): Linear(in_features=1024, out_features=512, bias=True)
    )
    (cell): GRU(768, 512, dropout=0.2)
    (out): Linear(in_features=1024, out_features=30499, bias=True)
  )
)


In [10]:
opt = optim.Adam(net.parameters(),lr=0.0001)

In [19]:
net.load_state_dict(torch.load('./saved_12_2_1320.pt'))

In [20]:
grad_clip = 5
num_batch = len(pairs) // batch_size
print_every_batches = 50
save_every_batches = 50
pairs.sort(key=lambda x: len(x[0].split()))

for epoch in range(1,50000):
    total_loss = 0
    tmp_loss = 0
    for batch_idx in range(num_batch):
        input_batches, input_lengths,\
            target_batches, target_lengths = random_batch(src,tgt,pairs,batch_size,batch_idx)
        
        opt.zero_grad()
        output = net(input_batches, input_lengths, target_batches, target_lengths)

        #loss = masked_cross_entropy.compute_loss(
        #    output.transpose(0, 1).contiguous(),
        #    target_batches.transpose(0, 1).contiguous(),
        #    target_lengths
        #)
        loss = F.nll_loss(output[1:].view(-1,tgt.num),
                          target_batches[1:].contiguous().view(-1),
                          ignore_index=PAD_idx)
        
        tmp_loss += loss.item()
        if (batch_idx+1) % print_every_batches == 0:
            print("Epoch %d Batch Num %d Loss: %f"%(epoch, batch_idx+1, tmp_loss/print_every_batches))
            tmp_loss = 0
        if (batch_idx+1) % save_every_batches == 0:
            torch.save(net.state_dict(), './saved.pt')
        clip_grad_norm_(net.parameters(), grad_clip)
        loss.backward()
        opt.step()
    print('epoch %d finished !'%(epoch))

Epoch 1 Batch Num 50 Loss: 0.811970
Epoch 1 Batch Num 100 Loss: 1.032570
Epoch 1 Batch Num 150 Loss: 1.141690
Epoch 1 Batch Num 200 Loss: 1.280142
Epoch 1 Batch Num 250 Loss: 1.339801
Epoch 1 Batch Num 300 Loss: 1.492034
Epoch 1 Batch Num 350 Loss: 1.530312
Epoch 1 Batch Num 400 Loss: 1.593064
Epoch 1 Batch Num 450 Loss: 1.756524
Epoch 1 Batch Num 500 Loss: 1.754398
Epoch 1 Batch Num 550 Loss: 1.851925
Epoch 1 Batch Num 600 Loss: 1.904029
epoch 1 finished !
Epoch 2 Batch Num 50 Loss: 0.801169
Epoch 2 Batch Num 100 Loss: 1.009426
Epoch 2 Batch Num 150 Loss: 1.136350
Epoch 2 Batch Num 200 Loss: 1.242602
Epoch 2 Batch Num 250 Loss: 1.334090
Epoch 2 Batch Num 300 Loss: 1.428328
Epoch 2 Batch Num 350 Loss: 1.524001
Epoch 2 Batch Num 400 Loss: 1.639959
Epoch 2 Batch Num 450 Loss: 1.674402
Epoch 2 Batch Num 500 Loss: 1.773530
Epoch 2 Batch Num 550 Loss: 1.796367
Epoch 2 Batch Num 600 Loss: 1.879960
epoch 2 finished !
Epoch 3 Batch Num 50 Loss: 0.780243
Epoch 3 Batch Num 100 Loss: 0.949883
Epo

KeyboardInterrupt: 

# Check Inference result

In [29]:
input_batches, input_lengths,\
    target_batches, target_lengths = random_batch(src,tgt,batch_size,200)

In [36]:
from infer_eval import bleu
for test_idx in range(60):
    _, pred = net.inference(input_batches[:,test_idx].reshape(input_lengths[0].item(),1),input_lengths[0].reshape(1))
    inp = ' '.join([src.idx2w[t] for t in input_batches[:,test_idx].cpu().numpy()])
    mt = ' '.join([tgt.idx2w[t] for t in pred if t!= PAD_idx])
    ref = ' '.join([tgt.idx2w[t] for t in target_batches[:,test_idx].cpu().numpy()])
    print('INPUT:\n' + inp)
    print('REF:\n' + ref)
    print('PREDICTION:\n' + mt)
    print('BLEU = %f' % bleu([mt],[[ref]],4))
    print("------")

INPUT:
<sos> " 恨 哪 ! " <eos>
REF:
<sos> " i hate it ! " <eos> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> " i hate it ! " <eos>
BLEU = 0.472367
------
INPUT:
<sos> 敌 动 我 动 . <eos>
REF:
<sos> we moved as the enemy moved . <eos> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> we moved the enemy as a enemy . <eos>
BLEU = 0.307777
------
INPUT:
<sos> 奥 卿 行 吗 ? <eos>
REF:
<sos> is secretary of state albright competent to be at the position ? <eos>
PREDICTION:
<sos> is the secretary of state albright competent competent to at the position ? <eos>
BLEU = 0.555524
------
INPUT:
<sos> 这 叫 什 麽 ? <eos>
REF:
<sos> what is that called ? <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> what is the called ? <eos>
BLEU = 0.254367
------
INPUT:
<sos> 怎 麽 办 呢 ? <eos>
REF:
<sos> what is to be done then ? <eos> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> what what is be done to then ? <eos>
BLEU = 0.335160
------
INPUT:
<sos> 远 的 不 说 . <eos>
REF:
<sos> always nothing is said

INPUT:
<sos> " 威胁 " 何在 ? <eos>
REF:
<sos> where is the " threat " ? <eos> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> where is the " threat " ? <eos>
BLEU = 0.573753
------
INPUT:
<sos> 有 亲人 在 美国 . <eos>
REF:
<sos> he has family members in the united states . <eos> <pad> <pad> <pad>
PREDICTION:
<sos> there has family members in the united states . <eos>
BLEU = 0.614412
------
INPUT:
<sos> 三 是 特色 意识 . <eos>
REF:
<sos> third , the notion of being special . <eos> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> third , the notion of being special characteristics . <eos>
BLEU = 0.564843
------
INPUT:
<sos> 这个 官 不好 当 . <eos>
REF:
<sos> it was not easy to serve in that official position . <eos> <pad>
PREDICTION:
<sos> it was not easy to serve that that it serve . <eos>
BLEU = 0.494875
------
INPUT:
<sos> 久违 了 , 祖国 ! <eos>
REF:
<sos> have not seen you for a long while , motherland ! <eos> <pad>
PREDICTION:
<sos> the long time , you have seen a long time ! <eos>
BLEU = 0.539672
------
INPUT:
<sos

# Check inference result of test data

In [22]:
test_src, test_tgt, test_pairs = prepareData('data/test.txt', 'english', 'chinese')
test_src.trim()
test_tgt.trim()
print(random.choice(test_pairs))

Reading lines...
Read 10000 sentence pairs
Trimmed to 8572 sentence pairs
Counting words...
Counted words:
chinese 17294
english 13049
chinese:
Total words 17294
After Trimming 17290
Keep Ratio % 99.9768705909564
english:
Total words 13049
After Trimming 13045
Keep Ratio % 99.96934631006208
['答 : 国债 投资 成果 显著 , 突出 表现 在 以下 几个 方面 : 一 是 有效地 扩大 了 投资 需求 , 促进 了 经济 发展 .', "a : the state 's investments have been very fruitful especially in the following areas : one , we have effectively expanded investment demand and have promoted economic growth ."]


In [41]:
test_src.w2idx, test_src.idx2w, test_src.num = src.w2idx, src.idx2w, src.num
test_tgt.w2idx, test_tgt.idx2w, test_tgt.num = tgt.w2idx, tgt.idx2w, tgt.num
test_pairs.sort(key=lambda x: len(x[0].split()))

In [47]:
input_batches, input_lengths,\
    target_batches, target_lengths = random_batch(test_src,test_tgt,test_pairs,batch_size,30)

In [48]:
from infer_eval import bleu
for test_idx in range(60):
    _, pred = net.inference(input_batches[:,test_idx].reshape(input_lengths[0].item(),1),input_lengths[0].reshape(1))
    inp = ' '.join([src.idx2w[t] for t in input_batches[:,test_idx].cpu().numpy()])
    mt = ' '.join([tgt.idx2w[t] for t in pred if t!= PAD_idx])
    ref = ' '.join([tgt.idx2w[t] for t in target_batches[:,test_idx].cpu().numpy()])
    print('INPUT:\n' + inp)
    print('REF:\n' + ref)
    print('PREDICTION:\n' + mt)
    print('BLEU = %f' % bleu([mt],[[ref]],4))
    print("------")

INPUT:
<sos> 外界 推测 , 巴拉克 这 一 符合 美国 要求 的 决定 , 可能 旨在 提高 和平谈判 达成 协议 的 机会 . <eos>
REF:
<sos> the outside speculated that barak might have made the decision , which has met the us demand , to gain greater opportunity for reaching an agreement in the peace talks . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> in fact that it is an to be the united states to be called for the agreement on the agreement on the the for the agreement . <eos>
BLEU = 0.178477
------
INPUT:
<sos> 同时 , 也 大力 宣传 打击 行贿 犯罪 的 成果 , 让 行贿 者 成为 人人喊打 的 " 过街老鼠 " . <eos>
REF:
<sos> they are also publicizing the results they have accomplished in striking hard on bribery , hoping that all <unk> will become as detestable as " mice on the streets " that everybody wants to remove . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> meanwhile , we also also made a special to vigorously keep them ; the people of them ; a ma

INPUT:
<sos> <unk> 以来 , 美国 军方 进行 过 3 导弹 拦截 试验 , 结果 是 1 次 成功 , 两 失败 . <eos>
REF:
<sos> since last autumn , the us military has conducted three tests on intercepting incoming missiles , with one success and two failures . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> in the test , the us military conducted the test of the nmd system , the the number of the tests were not just . <eos>
BLEU = 0.051973
------
INPUT:
<sos> 因此 美国 需要 通过 pntr 来 使得 自己 今后 在 中国 市场 上 与 欧盟 和 日本 进行 平等 竞争 . <eos>
REF:
<sos> hence the united states has to pass pntr , so that it will be able to compete with the eu and japan in the china market on an equal footing . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> the reason reason , the united states has to see that market market market market economic cooperation and china 's own

INPUT:
<sos> 首先 , 以 学科 专业 建设 为 " 龙头 " , 调整 学科 专业 布局 , 拓宽 规范 专业 口径 . <eos>
REF:
<sos> first , military academies are required to regard the building of different branches of learning as the lead , adjust the arrangement of academic departments , and widen standard academic requirements . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> first , some studies , disciplines to scientific studies of " " studies " " " " first , and building building building of specialized studies studies studies studies . <eos>
BLEU = 0.124569
------
INPUT:
<sos> <unk> 说 , 在 这种 赤裸裸 的 两国 论 前提 下 , 两岸 两会 如何 接触 , 对话 , 交流 ? <eos>
REF:
<sos> kuo chun - <unk> ' u wondered how the strait exchange foundation and the association for relations across the taiwan strait could maintain contacts , dialogs and exchanges under such an undisguised " two - state theory . " <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> zeng said : the the

INPUT:
<sos> 从 历史 上 来看 , 一种 政治 力量 的 衰落 , 往往 首先 是 从 思想上 的 衰落 开始 的 . <eos>
REF:
<sos> from a historic perspective , the decline of a political force often starts first with an ideological decline . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> from a political , , the 's political and a national of force and the point of the first of the forces . <eos>
BLEU = 0.098321
------
INPUT:
<sos> 应该说 , 中共 中央 和 中国 政府 的 方针 政策 是 明确 的 , 现在 的 问题 是 贯彻 落实 . <eos>
REF:
<sos> it should be said that the guidelines and policies of the cpc central committee and the chinese government are correct , and the problem now is to implement them . <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
PREDICTION:
<sos> it is that , the cpc central committee and the central government and the central government that expli