In [1]:
# %load train_nmt.py
from nmt import *
from pprint import pprint
from setup import setup
from data_iterator import TextIterator, prepare_data, prepare_cross

# import argparse

# parser = argparse.ArgumentParser()
# parser.add_argument('-m', type=str, default='fren')
# args = parser.parse_args()

model_options = setup('fren')
pprint(model_options)

# add random seed
model_options['rng']  = numpy.random.RandomState(seed=19920206)
model_options['trng'] = RandomStreams(model_options['rng'].randint(0, 2**32-1))
model_options['n_words_src'] = model_options['voc_sizes'][0]
model_options['n_words'] = model_options['voc_sizes'][1]


# load dictionaries and invert them
worddicts   = [None] * len(model_options['dictionaries'])
worddicts_r = [None] * len(model_options['dictionaries'])
for ii, dd in enumerate(model_options['dictionaries']):
    with open(dd, 'rb') as f:
        worddicts[ii] = pkl.load(f)
    worddicts_r[ii] = dict()
    for kk, vv in worddicts[ii].iteritems():
        worddicts_r[ii][vv] = kk

# reload options
if model_options['reload_'] and os.path.exists(model_options['saveto']):
    print 'Reloading model options'
    with open('%s.pkl' % model_options['saveto'], 'rb') as f:
        model_options = pkl.load(f)

        model_options['overwrite']  = False
        model_options['saveFreq']   = 500
        model_options['sampleFreq'] = 20

@Timeit
def build_networks(options):
    funcs = dict()

    print 'Building model: E -> F & F -> E model'
    params_ef = init_params(options, 'ef_')
    params_fe = init_params(options, 'fe_')
    print 'Done.'

    # reload parameters
    if options['reload_'] and os.path.exists(options['saveto']):
        print 'Reloading model parameters'
        params_ef = load_params(options['saveto'], params_ef)
        params_fe = load_params(options['saveto'], params_fe)

    tparams_ef = init_tparams(params_ef)
    tparams_fe = init_tparams(params_fe)

    # inputs of the model (x1, y1, x2, y2)
    x1 = tensor.matrix('x1', dtype='int64')
    x1_mask = tensor.matrix('x1_mask', dtype='float32')
    y1 = tensor.matrix('y1', dtype='int64')
    y1_mask = tensor.matrix('y1_mask', dtype='float32')
    x2 = tensor.matrix('x2', dtype='int64')
    x2_mask = tensor.matrix('x2_mask', dtype='float32')
    y2 = tensor.matrix('y2', dtype='int64')
    y2_mask = tensor.matrix('y2_mask', dtype='float32')

    # TM reference index
    tef12 = tensor.matrix('ef12', dtype='int64')
    tef12_mask = tensor.matrix('ef12_mask', dtype='float32')
    tef21 = tensor.matrix('ef21', dtype='int64')
    tef21_mask = tensor.matrix('ef21_mask', dtype='float32')
    tfe12 = tensor.matrix('fe12', dtype='int64')
    tfe12_mask = tensor.matrix('fe12_mask', dtype='float32')
    tfe21 = tensor.matrix('fe21', dtype='int64')
    tfe21_mask = tensor.matrix('fe21_mask', dtype='float32')

    print 'build forward-attention models (4 models simultaneously)...'
    ret_ef11 = build_model(tparams_ef, [x1, x1_mask, y1, y1_mask], options, 'ef_', False, True)   # E->F curr
    ret_fe11 = build_model(tparams_fe, [y1, y1_mask, x1, x1_mask], options, 'fe_', False, False)  # F->E curr
    ret_ef22 = build_model(tparams_ef, [x2, x2_mask, y2, y2_mask], options, 'ef_', False, True)   # E->F tm
    ret_fe22 = build_model(tparams_fe, [y2, y2_mask, x2, x2_mask], options, 'fe_', False, False)  # F->E tm

    print 'build cross-attention models'
    ret_ef12 = build_attender(tparams_ef,
                              [ret_ef11['prev_hids'], ret_ef11['prev_emb'], ret_ef22['ctx'], x2_mask],
                              options, 'ef_')  # E->F curr
    ret_ef21 = build_attender(tparams_ef,
                              [ret_ef22['prev_hids'], ret_ef22['prev_emb'], ret_ef11['ctx'], x1_mask],
                              options, 'ef_')  # E->F tm
    ret_fe12 = build_attender(tparams_fe,
                              [ret_fe11['prev_hids'], ret_fe11['prev_emb'], ret_fe22['ctx'], y2_mask],
                              options, 'fe_')  # F->E curr
    ret_fe21 = build_attender(tparams_fe,
                              [ret_fe22['prev_hids'], ret_fe22['prev_emb'], ret_fe11['ctx'], y1_mask],
                              options, 'fe_')  # F->E tm

    print 'build attentions (forward, cross-propagation)'

    def build_prop(atten_ef, atten_fe):
        atten_ef = atten_ef.dimshuffle(1, 0, 2)
        atten_fe = atten_fe.dimshuffle(1, 0, 2)
        attention = tensor.batched_dot(atten_ef, atten_fe).dimshuffle(1, 0, 2)
        return attention

    att_ef12 = build_prop(ret_ef12['attention'], ret_fe22['attention'])
    att_ef21 = build_prop(ret_ef21['attention'], ret_fe11['attention'])
    att_fe12 = build_prop(ret_fe12['attention'], ret_ef22['attention'])
    att_fe21 = build_prop(ret_fe21['attention'], ret_ef11['attention'])

    print 'build gates!'
    params_gate  = OrderedDict()
    params_gate  = get_layer('bi')[0](options, params_gate, nin=2 * options['dim'])
    tparams_gate = init_tparams(params_gate)

    # a neural gate which is the relatedness of two attentions.
    # def build_gate(ctx1, ctx2):
    #     return get_layer('bi')[1](tparams_gate, ctx1, ctx2)
    #
    # gate_ef1 = 1 - build_gate(ret_ef11['ctxs'], ret_ef12['ctxs'])
    # gate_ef2 = 1 - build_gate(ret_ef22['ctxs'], ret_ef21['ctxs'])
    # gate_fe1 = 1 - build_gate(ret_fe11['ctxs'], ret_fe12['ctxs'])
    # gate_fe2 = 1 - build_gate(ret_fe22['ctxs'], ret_fe21['ctxs'])
    #
    # print 'Building Gate functions, ...',
    # f_gate = theano.function([ret_ef11['ctxs'], ret_ef12['ctxs']],
    #                           gate_ef1, profile=profile)
    # print 'Done.'

    print 'Building a Natural Gate Function'
    gate_ef1 = 1 - tensor.clip(ret_ef12['att_sum'] / (ret_ef11['att_sum']), 0, 1)
    gate_ef2 = 1 - tensor.clip(ret_ef21['att_sum'] / (ret_ef22['att_sum']), 0, 1)
    gate_fe1 = 1 - tensor.clip(ret_fe12['att_sum'] / (ret_fe11['att_sum']), 0, 1)
    gate_fe2 = 1 - tensor.clip(ret_fe21['att_sum'] / (ret_fe22['att_sum']), 0, 1)

    print 'build loss function (w/o gate)'

    # get the loss function
    def compute_prob(probs, y, y_mask):

        # compute the loss for the vocabulary-selection side
        y_flat  = y.flatten()
        n_words = probs.shape[-1]
        y_flat_idx = tensor.arange(y_flat.shape[0]) * n_words + y_flat
        probw   = probs.flatten()[y_flat_idx]
        probw   = probw.reshape([y.shape[0], y.shape[1]]) * y_mask
        return probw

    prob_ef11 = ret_ef11['probs']
    prob_ef22 = ret_ef22['probs']
    prob_fe11 = ret_fe11['probs']
    prob_fe22 = ret_fe22['probs']

    def compute_cost(prob, y, y_mask, att, t, t_mask, g):
        _y = tensor.eq(y, 1)
        y_mask *= ((1 - _y) + _y * (1 - t_mask))
        ccost = -tensor.log(compute_prob(prob, y, y_mask) * g +
                            compute_prob(att, t, t_mask) * (1 - g) +
                            1e-7)
        ccost = (ccost * (1 - (1 - y_mask) * (1 - t_mask))).sum(0)
        return ccost

    # get cost
    cost_ef1 = compute_cost(prob_ef11, y1, y1_mask, att_ef12, tef12, tef12_mask, gate_ef1)
    cost_ef2 = compute_cost(prob_ef22, y2, y2_mask, att_ef21, tef21, tef21_mask, gate_ef2)
    cost_fe1 = compute_cost(prob_fe11, x1, x1_mask, att_fe12, tfe12, tfe12_mask, gate_fe1)
    cost_fe2 = compute_cost(prob_fe22, x2, x2_mask, att_fe21, tfe21, tfe21_mask, gate_fe2)

    cost = cost_ef1 + cost_ef2 + cost_fe1 + cost_fe2

    print 'build sampler (one-step)'
    f_init_ef, f_next_ef = build_sampler(tparams_ef, options, options['trng'], 'ef_')
    f_init_fe, f_next_fe = build_sampler(tparams_fe, options, options['trng'], 'fe_')

    print 'build attender (one-step)'
    f_attend_ef = build_attender(tparams_ef, None, options, 'ef_', one_step=True)  # E->F curr
    f_attend_fe = build_attender(tparams_fe, None, options, 'fe_', one_step=True)

    # before any regularizer
    print 'build Cost Function...',
    inputs = [x1, x1_mask, y1, y1_mask, x2, x2_mask, y2, y2_mask,
              tef12, tef12_mask, tef21, tef21_mask,
              tfe12, tfe12_mask, tfe21, tfe21_mask]
    f_valid = theano.function(inputs, cost, profile=profile)

    print 'build Gradient (backward)...',
    cost    = cost.mean()
    # tparams = dict(tparams_ef.items() + tparams_fe.items() + tparams_gate.items())
    tparams = dict(tparams_ef.items() + tparams_fe.items())
    grads   = clip(tensor.grad(cost, wrt=itemlist(tparams)), options['clip_c'])
    print 'Done'

    # compile the optimizer, the actual computational graph is compiled here
    lr = tensor.scalar(name='lr')
    print 'Building Optimizers...',
    f_cost, f_update = eval(options['optimizer'])(lr, tparams, grads, inputs, cost)

    print 'Done'

    # put everything into function lists
    funcs['valid']  = f_valid
    funcs['cost']   = f_cost
    funcs['update'] = f_update

    funcs['init_ef'] = f_init_ef
    funcs['init_fe'] = f_init_fe
    funcs['next_ef'] = f_next_ef
    funcs['next_fe'] = f_next_fe

    funcs['att_ef']  = f_attend_ef
    funcs['att_fe']  = f_attend_fe

    funcs['crit_ef'] = ret_ef11['f_critic']
    funcs['crit_fe'] = ret_ef22['f_critic']

    # funcs['gate']    = f_gate

    print 'Build Networks... done!'
    return funcs, tparams

funcs, tparams = build_networks(model_options)

# print 'save the compiled functions/tparams for temperal usage'


print 'Loading data'
train = TextIterator(model_options['datasets'], model_options['dictionaries'], [0, 0, 0, 0],
                     batch_size=model_options['batch_size'], maxlen=model_options['maxlen'])
valid = TextIterator(model_options['valid_datasets'], model_options['dictionaries'], [0, 0, 0, 0],
                     batch_size=model_options['batch_size'], maxlen=200)

if model_options['use_pretrain']:
    print 'use the pretrained NMT-models...',
    params = unzip(tparams)
    params = load_params2(model_options['baseline_ef'], params, mode='ef_')
    params = load_params2(model_options['baseline_fe'], params, mode='fe_')
    zipp(params, tparams)
    print 'Done.'

else:
    print 'not loading the pretrained baseline'

print '-------------------------------------------- Main-Loop -------------------------------------------------'



Using gpu device 0: GeForce GTX 1080 (CNMeM is disabled, cuDNN 5105)


{'baseline_ef': '/root/disk/scratch/model-tmnmt/baseline_enfr.bs64.npz',
 'baseline_fe': '/root/disk/scratch/model-tmnmt/baseline_fren.npz',
 'batch_size': 16,
 'beamsize': 5,
 'clip_c': 1.0,
 'd_maxlen': 200,
 'datasets': ['/root/workspace/TMNMT/.dataset/fren/train.fr.tok.shuf',
              '/root/workspace/TMNMT/.dataset/fren/train.en.tok.shuf',
              '/root/workspace/TMNMT/.dataset/fren/train.fr.tok.shuf',
              '/root/workspace/TMNMT/.dataset/fren/train.en.tok.shuf'],
 'decay_c': 0.0,
 'decoder': 'gru_cond',
 'dictionaries': ['/root/workspace/TMNMT/.dataset/fren/train.fr.tok.pkl',
                  '/root/workspace/TMNMT/.dataset/fren/train.en.tok.pkl',
                  '/root/workspace/TMNMT/.dataset/fren/train.fr.tok.pkl',
                  '/root/workspace/TMNMT/.dataset/fren/train.en.tok.pkl'],
 'dim': 1024,
 'dim_word': 512,
 'dispFreq': 10,
 'encoder': 'gru',
 'lrate': 2e-05,
 'maxlen': 50,
 'normalize': False,
 'optimizer': 'adam',
 'overwrite': True,
 'pa

In [27]:
def get_sample(tparams,
               funcs,
               x1, x2, y2,
               options,
               rng=None,
               m=0,
               k=1,  # beam-size
               maxlen=200,
               stochastic=True,
               argmax=False):
    # modes
    modes = ['ef', 'fe']

    # masks
    x1_mask = numpy.array(x1 > 0, dtype='float32')
    x2_mask = numpy.array(x2 > 0, dtype='float32')
    y2_mask = numpy.array(y2 > 0, dtype='float32')

    # k is the beam size we have
    if k > 1:
        assert not stochastic, 'Beam search does not support stochastic sampling'

    sample = []
    action = []
    sample_score = []
    if stochastic:
        sample_score = 0

    live_k = 1
    dead_k = 0

    hyp_samples = [[]] * live_k
    hyp_actions = [[]] * live_k
    hyp_scores  = numpy.zeros(live_k).astype('float32')
    hyp_states  = []

    # get initial state of decoder rnn and encoder context for x1
    ret = funcs['init_' + modes[m]](x1)
    next_state, ctx0 = ret[0], ret[1]  # init-state, contexts
    next_w = -1 * numpy.ones((1,)).astype('int64')  # bos indicator

    # get translation memory encoder context
    _, mctx0 = funcs['init_' + modes[m]](x2)

    # get attention propagation for translation memory
    attpipe, _ = funcs['crit_' + modes[1 - m]](y2, y2_mask, x2, x2_mask)
    attpipe = numpy.squeeze(attpipe)

    for ii in xrange(maxlen):
        ctx = numpy.tile(ctx0, [live_k, 1])
        mctx = numpy.tile(mctx0, [live_k, 1])

        # --copy mode
        ret = funcs['att_' + modes[m]](next_state, next_w, mctx)
        mctxs, matt, mattsum = ret[0], ret[1], ret[2]    # matt: batchsize x len_x2
        copy_p = numpy.dot(matt, attpipe)  # batchsize x len_y2

        # --generate mode
        ret = funcs['next_' + modes[m]](next_w, ctx, next_state)
        next_p, next_w, next_state, ctxs, attsum = ret[0], ret[1], ret[2], ret[3], ret[4]

        # compute gate
        # gates = funcs['gate'](ctxs[None, :, :], mctxs[None, :, :])[0]  # batchsize
        # gates = numpy.clip(mattsum / (mattsum + attsum), 0, 1) # Natural Gate.
        gates = numpy.clip(mattsum / attsum, 0, 1)
        
        # real probabilities
        next_p *= (1 - gates[:, None])
        copy_p *= gates[:, None]

        def _merge():
            temp_p = copy.copy(numpy.concatenate([next_p, copy_p], axis=1))
            lmax = next_p.shape[1]
            for i in range(next_p.shape[0]):
                for j in range(copy_p.shape[1]):
                    if y2[j] != 1:
                        temp_p[i, y2[j]] += copy_p[i, j]
                        temp_p[i, lmax + j] = 0.
                temp_p[i, 1] = 0. # never output UNK
            # temp_p -= 1e-8
            return temp_p

        merge_p = _merge()

        if stochastic:
            if argmax:
                nw = merge_p[0].argmax()
                next_w[0] = nw
            else:
                nw = rng.multinomial(1, pvals=merge_p[0]).argmax()

            sample.append(nw)
            action.append(gates[0])
            sample_score -= numpy.log(merge_p[0, nw])
            if nw == 0:
                break


    return sample, sample_score, action

In [31]:
print '-------------------------------------------- Main-Loop -------------------------------------------------'

# ------------------ initlization --------------- #
best_p       = None
bad_counter  = 0
uidx         = 0
estop        = False
history_errs = []
max_epochs   = 100
finish_after = 10000000

lrate        = model_options['lrate']
saveFreq     = model_options['saveFreq']
sampleFreq   = model_options['sampleFreq']
validFreq    = model_options['validFreq']
saveto       = model_options['saveto']
overwrite    = model_options['overwrite']

# ----------------------------------------------- #

# reload history
if model_options['reload_'] and os.path.exists(model_options['saveto']):
    rmodel = numpy.load(model_options['saveto'])
    history_errs = list(rmodel['history_errs'])
    if 'uidx' in rmodel:
        uidx = rmodel['uidx']


# idx back to sequences
def idx2seq(x, ii):
    seq = []
    for vv in x:
        if vv == 0:
            break
        if vv in worddicts_r[ii]:
            seq.append(worddicts_r[ii][vv])
        else:
            seq.append('UNK')
    return ' '.join(seq)


# compute-update
@Timeit
def execute(inps, lrate, info):
    eidx, uidx = info
    cost = funcs['cost'](*inps)

    # check for bad numbers, usually we remove non-finite elements
    # and continue training - but not done here
    if numpy.isnan(cost) or numpy.isinf(cost):
        print 'NaN detected'
        sys.exit(-1)

    funcs['update'](lrate)
    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost,
    return cost

def prepare_cross2(seqs_x1, seqs_x2, maxlen_x1):
    n_samples = len(seqs_x1)
    t = numpy.zeros((maxlen_x1, n_samples)).astype('int64')
    t_mask = numpy.zeros((maxlen_x1, n_samples)).astype('float32')

    for idx, (x1, x2) in enumerate(zip(seqs_x1, seqs_x2)):

        match = [[(i, abs(i - j))
                  for i, xx2 in enumerate(x2) if xx1 == xx2]
                 for j, xx1 in enumerate(x1)]

        for jdx, m in enumerate(match):
            if len(m) > 0:
                if len(m) == 1:
                    t[jdx, idx] = m[0][0]
                else:
                    t[jdx, idx] = sorted(m, key=lambda a: a[1])[0][0]

                t_mask[jdx, idx] = 1.

    return t, t_mask

# start!!
print 'Loading data'
train = TextIterator(model_options['datasets'], model_options['dictionaries'], [0, 0, 0, 0],
                     batch_size=model_options['batch_size'], maxlen=model_options['maxlen'])
valid = TextIterator(model_options['valid_datasets'], model_options['dictionaries'], [0, 0, 0, 0],
                     batch_size=model_options['batch_size'], maxlen=200)


max_epochs = 1
for eidx in xrange(max_epochs):
    n_samples = 0

    for k, (sx1, sy1, sx2, sy2) in enumerate(train):
        uidx += 1
        if uidx < 78:
            continue
            
        x1, x1_mask = prepare_data(sx1, model_options['maxlen'], model_options['voc_sizes'][0])
        y1, y1_mask = prepare_data(sy1, model_options['maxlen'], model_options['voc_sizes'][1])
        x2, x2_mask = prepare_data(sx2, model_options['maxlen'], model_options['voc_sizes'][2])
        y2, y2_mask = prepare_data(sy2, model_options['maxlen'], model_options['voc_sizes'][3])

        tx12, tx12_mask = prepare_cross2(sx1, sx2, x1.shape[0])
        tx21, tx21_mask = prepare_cross2(sx2, sx1, x2.shape[0])
        ty12, ty12_mask = prepare_cross2(sy1, sy2, y1.shape[0])
        ty21, ty21_mask = prepare_cross2(sy1, sy2, y2.shape[0])

        inps = [x1, x1_mask, y1, y1_mask,
                x2, x2_mask, y2, y2_mask,
                ty12, ty12_mask, ty21, ty21_mask,
                tx12, tx12_mask, tx21, tx21_mask]

        execute(inps, lrate, [eidx, uidx])
       
#         for jj in xrange(numpy.minimum(5, x1.shape[1])):
#             stochastic = True
#             sample, sc, acts = get_sample(tparams, funcs,
#                                        x1[:, jj][:, None],
#                                        x2[:, jj][:, None],
#                                        y2[:, jj][:, None],
#                                        model_options,
#                                        rng=model_options['rng'],
#                                        m=1,
#                                        k=1,
#                                        maxlen=200,
#                                        stochastic=model_options['stochastic'],
#                                        argmax=True)

#             print 'Source-CR {}: {}'.format(jj, idx2seq(sx1[jj], 0))
#             print 'Target-CR {}: {}'.format(jj, idx2seq(sy1[jj], 1))
#             print '-----------------------------'
#             print 'Source-TM {}: {}'.format(jj, idx2seq(sx2[jj], 2))
#             print 'Target-TM {}: {}'.format(jj, idx2seq(sy2[jj], 3))
#             print '============================='

#             if model_options['stochastic']:
#                 ss = sample
#             else:
#                 sc /= numpy.array([len(s) for s in sample])
#                 ss = sample[sc.argmin()]

#             _ss = []
#             for ii, si in enumerate(ss):
#                 if si < model_options['voc_sizes'][1]:
#                     _ss.append(si)
#                 else:
#                     print si
#                     offset = si - model_options['voc_sizes'][1]
#                     _ss.append(sy2[jj][offset])

#             print 'Sample-CR {}: {}'.format(jj, idx2seq(_ss, 1))
#             print 'Copy Prob {}: {}'.format(jj, ' '.join(['{:.2f}'.format(a) for a in acts]))
#             print

#             import sys; sys.exit(123)
#             break
        
        
#     print 'Seen %d samples' % n_samples

#     if estop:
#         break

# if best_p is not None:
#     zipp(best_p, tparams)

# valid_err = validate(funcs, model_options, valid).mean()
# print 'Valid ', valid_err

# params = copy.copy(best_p)
# numpy.savez(saveto, zipped_params=best_p,
#             history_errs=history_errs,
#             uidx=uidx,
#             **params)




-------------------------------------------- Main-Loop -------------------------------------------------
Loading data
Epoch  0 Update  78 Cost  105.951843262 execute: elapsed 0.5847 secs.

Epoch  0 Update  79 Cost  91.9244689941 execute: elapsed 0.5153 secs.

Epoch  0 Update  80 Cost  554.106994629 execute: elapsed 2.2140 secs.

Epoch  0 Update  81 Cost  563.347167969 execute: elapsed 1.9617 secs.

Epoch  0 Update  82 Cost  502.607177734 execute: elapsed 1.6864 secs.

Epoch  0 Update  83 Cost  413.75692749 execute: elapsed 1.5985 secs.

Epoch  0 Update  84 Cost  360.311828613 execute: elapsed 1.3470 secs.

Epoch  0 Update  85 Cost  306.466339111 execute: elapsed 1.2228 secs.

Epoch  0 Update  86 Cost  260.278106689 execute: elapsed 1.1624 secs.

Epoch  0 Update  87 Cost  246.771148682 execute: elapsed 1.0160 secs.

Epoch  0 Update  88 Cost  201.967681885 execute: elapsed 0.8447 secs.

Epoch  0 Update  89 Cost  168.376983643 execute: elapsed 0.8281 secs.

Epoch  0 Update  90 Cost  134.7

KeyboardInterrupt: 

Gate=1
-------------------------------------------- Main-Loop -------------------------------------------------
Loading data
Source-CR 0: Pour la réexportation des marchandises non communautaires qui ne sont pas déchargées ou qui sont transbordées au sens de l &apos; article 176 paragraphe 2 du code , la notification visée à l &apos; article 182 paragraphe 3 du code n &apos; est pas nécessaire . &quot;
Target-CR 0: In the case of the re-exportation of non-Community goods , which are not unloaded or which are transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall not be required . &apos;
-----------------------------
Source-TM 0: Pour la réexportation des marchandises non communautaires qui ne sont pas déchargées ou qui sont transbordées au sens de l &apos; article 176 paragraphe 2 du code , la notification visée à l &apos; article 182 paragraphe 3 du code n &apos; est pas nécessaire . &quot;
Target-TM 0: In the case of the re-exportation of non-Community goods , which are not unloaded or which are transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall not be required . &apos;
=============================
Sample-CR 0: In In re-exportation of non-Community goods , , , , , , , notification within Article 182 ( 2 3 2 of Code , notification referred in Article 182 ( 3 3 of Code shall not required . &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos; &apos;
Copy Prob 0: 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00

Gate = 0
-------------------------------------------- Main-Loop -------------------------------------------------
Loading data
Source-CR 0: Pour la réexportation des marchandises non communautaires qui ne sont pas déchargées ou qui sont transbordées au sens de l &apos; article 176 paragraphe 2 du code , la notification visée à l &apos; article 182 paragraphe 3 du code n &apos; est pas nécessaire . &quot;
Target-CR 0: In the case of the re-exportation of non-Community goods , which are not unloaded or which are transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall not be required . &apos;
-----------------------------
Source-TM 0: Pour la réexportation des marchandises non communautaires qui ne sont pas déchargées ou qui sont transbordées au sens de l &apos; article 176 paragraphe 2 du code , la notification visée à l &apos; article 182 paragraphe 3 du code n &apos; est pas nécessaire . &quot;
Target-TM 0: In the case of the re-exportation of non-Community goods , which are not unloaded or which are transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall not be required . &apos;
=============================
Sample-CR 0: For the re-exportation of non-Community goods which are not unloaded or transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification provided for in Article 182 ( 3 ) of the Code shall be required . &apos; ;
Copy Prob 0: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00

An exception has occurred, use %tb to see the full traceback.

Gate = Natural
-------------------------------------------- Main-Loop -------------------------------------------------
Loading data
Source-CR 0: Pour la réexportation des marchandises non communautaires qui ne sont pas déchargées ou qui sont transbordées au sens de l &apos; article 176 paragraphe 2 du code , la notification visée à l &apos; article 182 paragraphe 3 du code n &apos; est pas nécessaire . &quot;
Target-CR 0: In the case of the re-exportation of non-Community goods , which are not unloaded or which are transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall not be required . &apos;
-----------------------------
Source-TM 0: Pour la réexportation des marchandises non communautaires qui ne sont pas déchargées ou qui sont transbordées au sens de l &apos; article 176 paragraphe 2 du code , la notification visée à l &apos; article 182 paragraphe 3 du code n &apos; est pas nécessaire . &quot;
Target-TM 0: In the case of the re-exportation of non-Community goods , which are not unloaded or which are transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall not be required . &apos;
=============================
Sample-CR 0: In the case of re-exportation of non-Community goods , which are not unloaded or transhipped within the meaning of Article 176 ( 2 ) of the Code , the notification referred to in Article 182 ( 3 ) of the Code shall be required . &apos; ;
Copy Prob 0: 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50



In [17]:
print x1[:, 1], x1[:, 1].shape
print y1[:, 1], y1[:, 1].shape
print ty12[:, 1]
print ty12_mask[:, 1]
print sy1[1]

[  17 1756   12  116  163    2 3814   61    3   18   87    2 6001    2 2139
    4    2 2139   26    2 1581    2    8    3  948    4   25  145  764 2517
   50  874    4   81    9    8    3   30  131    4  365  152   11  140    4
   15   39    2  124    7    0] (51,)
[   9 1693    8  181   62   15 2043   30   11    2  674    3   14 2825   20
 1848    3    2  482   20   14 2895 2825    4   50  758  404   54   15  839
    4  217    7   22  122    9  149    8   10    9  127    8    3    2  437
   32    6    0    0    0    0] (51,)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46  0  0  0
  0]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  0.  0.  0.  0.]
[9, 1693, 8, 181, 62, 15, 2043, 30, 11, 2, 674, 3, 14, 2825, 20, 1848, 3, 2, 482, 20, 14, 2895, 2825, 4