In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import string
import random
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.contrib import seq2seq

from sklearn.utils import shuffle

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
epochs = 30000
lstm_size = 128
embed_size = 16
max_word_length = 12

In [3]:
with open('words/google-10000-english-usa-no-swears.txt', 'r') as word_file:
    words = word_file.read().split('\n')
    
# do a little bit of cleaning just in case
def only_letters(some_string):
    return ''.join([c for c in some_string if c in string.ascii_letters])

def pad_front(some_word):
    while len(some_word) < max_word_length:
        some_word = '_' + some_word
        
    return some_word

def pad_rear(some_word):
    while len(some_word) < max_word_length:
        some_word += '_'
        
    return some_word

words = [only_letters(wrd) for wrd in words if 2 < len(wrd) < 10]
    
print('Found {} words'.format(len(words)))
print(words[7000])

Found 8124 words
sandwich


In [4]:
# create program to generate pig latin
def make_piglatin(some_word):    
    vowels = ['a', 'e', 'i', 'o', 'u']
    ay = 'ay'

    if some_word[0] in vowels:
        new_word = '>' + some_word + ay
    else:
        new_word = '>' + some_word[1:] + some_word[0] + ay
        
    new_word = pad_rear(new_word)
    old_word = pad_front(some_word)
        
    return old_word, new_word

# test it
old_word, new_word = make_piglatin(words[-7000])
print(old_word, new_word)
print(words[-7000], make_piglatin(words[-7000]))

_____cameras >amerascay__
cameras ('_____cameras', '>amerascay__')


In [5]:
# get dictionaries so we can convert from letters to index and index to letters
chars = string.ascii_letters + '>_'

char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

vocab_size = len(chars)

In [6]:
# set up the place holders for our computational graph
inputs = tf.placeholder(shape=[1, None], dtype=tf.int32, name='encoder_input')
decoder_inputs = tf.placeholder(shape=[None, None], dtype=tf.int32, name='decoder_input')
targets = tf.placeholder(shape=[None, vocab_size], dtype=tf.int32, name='targets')

# create an initializer to init our weight matricies
init = tf.random_normal_initializer(stddev=0.1)

In [7]:
# set up the embedding layer
embeddings = tf.get_variable("embeddings", [vocab_size, embed_size], initializer=init)

with tf.device('/cpu:0'), tf.name_scope("input_embedding"):
    encoder_embedding = tf.nn.embedding_lookup(embeddings, inputs)

with tf.device('/cpu:0'), tf.name_scope("output_embedding"):
    decoder_embedding = tf.nn.embedding_lookup(embeddings, decoder_inputs)

In [8]:

# create the encoder LSTM
with tf.variable_scope('encoder') as enc_scope:
    lstm_encoder = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(lstm_size),
                                                 output_keep_prob=0.65)
    
    _, encoder_last_state = tf.nn.dynamic_rnn(lstm_encoder,
                                                   inputs=encoder_embedding, 
                                                   dtype=tf.float32, 
                                                   time_major=False)
    
# switch to our inference helper
inference_helper = seq2seq.GreedyEmbeddingHelper(embeddings,
                                                 start_tokens=[char2idx['>']],
                                                 end_token=char2idx['_'])

train_helper = seq2seq.TrainingHelper(inputs=decoder_embedding, sequence_length=[11],
                                      time_major=False)

def decode(helper, scope_name, reuse=None):
    with tf.variable_scope(scope_name, reuse=reuse) as dec_scope:
        projection_layer = tf.layers.Dense(vocab_size, use_bias=False, name='Projection')
        lstm_decoder = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(lstm_size),
                                                 output_keep_prob=0.65)

        decoder = seq2seq.BasicDecoder(lstm_decoder, helper, encoder_last_state, 
                                       output_layer=projection_layer)
        outputs, _, _ = seq2seq.dynamic_decode(decoder, output_time_major=False,
                                               impute_finished=True, maximum_iterations=20)

        # get the output from the decoder
        logits = outputs.rnn_output
        
        return logits
    
train_logits = decode(train_helper, 'decoder')
predictions = tf.argmax(decode(inference_helper, 'decoder', reuse=True), -1, name='decoder_pred')

In [9]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=train_logits)
loss = tf.reduce_mean(cross_entropy)

optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [10]:
train_logits.get_shape().as_list()

[1, None, 54]

In [11]:
init = tf.global_variables_initializer()

# get test words
words = shuffle(words)
test_words = words[:10]
words = words[10:]

def one_hot(v):
    return np.eye(vocab_size)[v]

with tf.Session() as sess:
    sess.run(init)
    for e in range(epochs):
        rng_idx = np.random.randint(len(words))

        input_word, targ_word = make_piglatin(words[rng_idx])

        input_vals = [char2idx[c] for c in input_word]
        target_vals = [char2idx[c] for c in targ_word[1:]]
        decoder_vals = [char2idx[c] for c in targ_word[:-1]]

        target_vals = one_hot(target_vals)

        input_vals = np.asarray(input_vals).reshape(1, len(input_vals))
        decoder_vals = np.asarray(decoder_vals).reshape(1, len(decoder_vals))

        _, loss_val = sess.run([optimizer, loss], 
                               feed_dict={inputs: input_vals,
                                          decoder_inputs: decoder_vals,
                                          targets: target_vals})


        if e % 1000 == 0:
            print('epoch: {}, loss: {}'.format(e, loss_val))
            
            for i in range(len(test_words)):
                val_word, val_targ = make_piglatin(test_words[i])
                val_input = [char2idx[c] for c in val_word]
                val_dec_in = [char2idx[c] for c in val_targ[:-1]]
                
                val_input = np.asarray(val_input).reshape(1, len(val_input))
                val_dec_in = np.asarray(val_dec_in).reshape(1, len(val_dec_in))
                
                prediction = sess.run(train_logits, feed_dict={inputs: val_input,
                                                               decoder_inputs: val_dec_in})
                
                prediction = np.argmax(prediction, axis=-1)

                print("Given: {}, Predicted: {}".format(val_word, 
                      ''.join([idx2char[idx] for idx in prediction[0]])))
            print()
            
            
    print()
    print('-------------------------------------')
    print('Inference mode')
    print('-------------------------------------')
    for i in range(len(test_words)):
        val_input = [char2idx[c] for c in test_words[i]]

        val_input = np.asarray(val_input).reshape(1, len(val_input))
        
        # for the inference mode we only pass the english word to translate
        prediction = sess.run(predictions, feed_dict={inputs: val_input})

        print("Given: {}, Predicted: {}".format(test_words[i], 
              ''.join([idx2char[idx] for idx in prediction[0]])))
    print()

    saver = tf.train.Saver()
    saver.save(sess, './saved_translator/pig_latin_encoder-decoder.ckpt')
    
    # will save this to create a pig latin translation application, best to use frozen graph
    relevant_nodes = ['encoder_input', 'decoder_pred']
    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess, # The session is used to retrieve the weights
        tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 
        relevant_nodes # The output node names are used to select the usefull nodes
    )
    
    # Finally we serialize and dump the output graph to the filesystem
    with tf.gfile.GFile('./saved_translator/piglatin_enc-dec.pb', "wb") as f:
        f.write(output_graph_def.SerializeToString())

epoch: 0, loss: 3.990586996078491
Given: ______parade, Predicted: eeecceeceDe
Given: ________goal, Predicted: >ovoacczzzH
Given: ______jaguar, Predicted: emev_cvceev
Given: ______meetup, Predicted: DHDDlDePwdH
Given: ________eyed, Predicted: DeDDaPLeGH_
Given: _______about, Predicted: meP__oowwB_
Given: _________guy, Predicted: DovebeDDDHj
Given: _______maine, Predicted: eerLPPeeG_l
Given: ______matrix, Predicted: orvvccGcceG
Given: ____cylinder, Predicted: Leeeckccccc

epoch: 1000, loss: 1.6840102672576904
Given: ______parade, Predicted: aneneaay___
Given: ________goal, Predicted: anyaay_____
Given: ______jaguar, Predicted: aneeraay___
Given: ______meetup, Predicted: eneeeaay___
Given: ________eyed, Predicted: eleaay_____
Given: _______about, Predicted: aneeday____
Given: _________guy, Predicted: ot_ay______
Given: _______maine, Predicted: enlssay____
Given: ______matrix, Predicted: aneeaaay___
Given: ____cylinder, Predicted: oeeeeedaay_

epoch: 2000, loss: 1.8633555173873901
Given: _