In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import os
import numpy as np
import random
import string
import tensorflow as tf
import zipfile
from six.moves import range
import collections
import math
import seq2seq_model
import data_utils
import time

In [2]:
_buckets = [(21, 25), (31, 35)]
max_word_length = 21
vocabulary_size = len(string.ascii_lowercase) + 5 # [a-z] + ' ' + PAD + GO + EOS + UNK
print(vocabulary_size)
first_letter = ord(string.ascii_lowercase[0])

def char2id(char):
  if char in string.ascii_lowercase:
    return ord(char) - first_letter + 5 # 0~3 are used by data_utils
  elif char == ' ':
    return 4
  else:
    print('Unexpected character: %s' % char)
    return 0
  
def id2char(dictid):
  if dictid > 4:
    return chr(dictid + first_letter - 5)
  elif dictid == 4:
    return ' '
  else:
    print('Unexpected id: %d' % dictid)
    return ''

def random_character(low_range, high_range):
    return id2char(random.randint(low_range, high_range))

def generate_seq(max_word_length, seq_min_length, seq_max_length):
    seq = ''
    word_length = 0
    seq_length = random.randint(seq_min_length, seq_max_length)
    low_range = char2id(' ')
    high_range = char2id('z')
    for i in range(seq_length):
        c = random_character(low_range, high_range)
        if i > 0 and seq[i-1] == ' ':
            while c == ' ':
                c = random_character(low_range, high_range)
        if c == ' ':
            word_length = 0
        else:
            word_length += 1
            if word_length > max_word_length:
                c = ' '
                word_length = 0
        seq += c
    return seq

def reverse_word(word):
    return word[::-1]

def generate_seq_labels(seq):
    reversed_words = [reverse_word(word) for word in seq.split()]
    leading_whitespaces = ' ' * (len(seq) - len(seq.lstrip(' ')))
    tailing_whitespaces = ' ' * (len(seq) - len(seq.rstrip(' ')))
    reversed_seq = leading_whitespaces + ' '.join(reversed_words) + tailing_whitespaces
    return reversed_seq

def generate_data(max_size=None):
    seq_min_length = 10
    seq_max_length = 30
    source = generate_seq(max_word_length, seq_min_length, seq_max_length)
    target = generate_seq_labels(source)
    counter = 0
    data_set = [[] for _ in _buckets]
    while source and target and (not max_size or counter < max_size):
        counter += 1
        source_ids = [char2id(x) for x in source]
        target_ids = [char2id(x) for x in target]
        target_ids.append(data_utils.EOS_ID)
        for bucket_id, (source_size, target_size) in enumerate(_buckets):
          if len(source_ids) < source_size and len(target_ids) < target_size:
            data_set[bucket_id].append([source_ids, target_ids])
            break
        source = generate_seq(max_word_length, seq_min_length, seq_max_length)
        target = generate_seq_labels(source)
    return data_set

print(generate_data(10))


31
[[[[11, 18, 7, 11, 10, 24, 25, 16, 19, 29, 9, 20, 16, 17, 18, 7, 21, 12, 11, 18], [18, 11, 12, 21, 7, 18, 17, 16, 20, 9, 29, 19, 16, 25, 24, 10, 11, 7, 18, 11, 2]], [[30, 20, 14, 22, 10, 7, 23, 9, 17, 4, 14, 17, 24, 23, 10, 23, 8, 13, 24], [17, 9, 23, 7, 10, 22, 14, 20, 30, 4, 24, 13, 8, 23, 10, 23, 24, 17, 14, 2]], [[18, 28, 30, 13, 8, 16, 28, 16, 28, 12, 13, 25, 12, 28, 19, 25], [25, 19, 28, 12, 25, 13, 12, 28, 16, 28, 16, 8, 13, 30, 28, 18, 2]], [[16, 22, 6, 24, 10, 11, 17, 5, 26, 16, 17, 16, 30, 12, 22, 6], [6, 22, 12, 30, 16, 17, 16, 26, 5, 17, 11, 10, 24, 6, 22, 16, 2]], [[27, 30, 8, 8, 11, 28, 22, 18, 14, 4, 29], [14, 18, 22, 28, 11, 8, 8, 30, 27, 4, 29, 2]], [[26, 30, 12, 7, 4, 21, 26, 23, 25, 6, 27, 30, 6, 20], [7, 12, 30, 26, 4, 20, 6, 30, 27, 6, 25, 23, 26, 21, 2]], [[28, 17, 13, 9, 11, 8, 9, 17, 25, 14, 29, 9, 7, 25, 16, 5, 5], [5, 5, 16, 25, 7, 9, 29, 14, 25, 17, 9, 8, 11, 9, 13, 17, 28, 2]]], [[[28, 20, 4, 11, 13, 25, 30, 20, 16, 29, 27, 9, 6, 25, 23, 6, 5, 29, 7, 7, 2

In [3]:
model_size = 64
num_layers = 2
max_gradient_norm = 5.0
batch_size = 64
learning_rate = 1.0
learning_rate_decay_factor = 0.9
train_dir = '/tmp'
max_train_data_size = 10001 * batch_size
steps_per_checkpoint = 100
num_steps = 10001

In [4]:
def create_model(session, forward_only):
  dtype = tf.float32
  model = seq2seq_model.Seq2SeqModel(
      vocabulary_size, vocabulary_size, _buckets,
      model_size, num_layers, max_gradient_norm, batch_size,
      learning_rate, learning_rate_decay_factor, use_lstm=True,
      forward_only=forward_only,
      dtype=dtype)
  ckpt = tf.train.get_checkpoint_state(train_dir)
  if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    session.run(tf.global_variables_initializer())
  return model

In [5]:
def train():
  with tf.Session() as sess:
    # Create model.
    print("Creating %d layers of %d units." % (num_layers, model_size))
    model = create_model(sess, False)

    # Read data into buckets and compute their sizes.
    train_set = generate_data(max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))
    print("train total size %d" % train_total_size)

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while current_step < num_steps:
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / steps_per_checkpoint
      loss += step_loss / steps_per_checkpoint
      current_step += 1

      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % steps_per_checkpoint == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(float(loss)) if loss < 300 else float("inf")
        print ("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(train_dir, "translate.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        
train()


Creating 2 layers of 64 units.
Created model with fresh parameters.
train total size 640064
global step 100 learning rate 1.0000 step-time 0.67 perplexity 26.67
global step 200 learning rate 1.0000 step-time 0.70 perplexity 21.94
global step 300 learning rate 1.0000 step-time 0.67 perplexity 18.51
global step 400 learning rate 1.0000 step-time 0.65 perplexity 16.21
global step 500 learning rate 1.0000 step-time 0.65 perplexity 13.51
global step 600 learning rate 1.0000 step-time 0.64 perplexity 10.79
global step 700 learning rate 1.0000 step-time 0.67 perplexity 9.27
global step 800 learning rate 1.0000 step-time 0.68 perplexity 7.82
global step 900 learning rate 1.0000 step-time 0.66 perplexity 6.54
global step 1000 learning rate 1.0000 step-time 0.69 perplexity 5.89
global step 1100 learning rate 1.0000 step-time 0.67 perplexity 5.00
global step 1200 learning rate 1.0000 step-time 0.68 perplexity 4.47
global step 1300 learning rate 1.0000 step-time 0.66 perplexity 4.07
global step 14

In [5]:
def decode():
  with tf.Session() as sess:
    # Create model and load parameters.
    model = create_model(sess, True)
    model.batch_size = 1  # We decode one sentence at a time.

    sentence = "the quick brown fox"
    
    # Get token-ids for the input sentence.
    token_ids = [char2id(x) for x in sentence]
    # Which bucket does it belong to?
    bucket_id = len(_buckets) - 1
    for i, bucket in enumerate(_buckets):
        if bucket[0] >= len(token_ids):
            bucket_id = i
            break
        else:
            logging.warning("Sentence truncated: %s", sentence)

    # Get a 1-element batch to feed the sentence to the model.
    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
      {bucket_id: [(token_ids, [])]}, bucket_id)
    # Get output logits for the sentence.
    _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, True)
    # This is a greedy decoder - outputs are just argmaxes of output_logits.
    outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
    # If there is an EOS symbol in outputs, cut them at that point.
    if data_utils.EOS_ID in outputs:
        outputs = outputs[:outputs.index(data_utils.EOS_ID)]
    print(''.join([id2char(output) for output in outputs]))
    
decode()        
      

Reading model parameters from /tmp/translate.ckpt-10000
eht kciuq nworb xof
