In [203]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random 
import collections
import time

start_time = time.time()

def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"

In [204]:
log_path = './tensorflow/logs'
writer = tf.summary.FileWriter(log_paths)
training_file = './input/paragraph.txt'

In [205]:
def read_data(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [content[i].split() for i in range(len(content))]
    content = np.array(content)
    content = np.reshape(content, [-1, ])
    return content

training_data = read_data(training_file)
print("Loaded training data....")

Loaded training data....


In [206]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

dictionary, reverse_dictionary = build_dataset(training_data)

In [207]:
vocab_size = len(dictionary)
learning_rate = 1e-3
training_iters = 50000
display_step = 1000
n_input = 3
n_hidden = 512
tf.reset_default_graph()
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

#RNN output node weights and biases
weights = {
    'out' : tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out' : tf.Variable(tf.random_normal([vocab_size]))
}


In [208]:
def RNN(x, weights, biases):
    #reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_input, 1)
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden), rnn.BasicLSTMCell(n_hidden)])
    
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype = tf.float32)
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [209]:
pred = RNN(x, weights, biases)

In [210]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

In [211]:
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [212]:
init = tf.global_variables_initializer()

In [1]:
from tqdm import tqdm
with tf.Session() as session:
    session.run(init)
    offset = random.randint(0, n_input+1)
    end_offset = n_input+1
    acc_total = 0
    loss_total = 0
    
    writer.add_graph(session.graph)
    for step in tqdm(range(training_iters)):
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)
        symbols_in_keys = [ [dictionary[str(training_data[i])]] for i in range(offset, offset+n_input)]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        
        symbols_out_onehot = np.zeros([vocab_size], dtype = float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot, [1, -1])
        
        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                               feed_dict = {x: symbols_in_keys, y: symbols_out_onehot})
    
        loss_total += loss
        acc_total += acc
        if (step+1)%display_step == 0:
            print("Iter= " + str(step+1) + ", Average loss= " + \
                 "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                 "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset+n_input)]
            symbols_out = training_data[offset+n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in, symbols_out, symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    print("\ttensorboard --logdir=%s" % (log_path))
    print("Point your web browser to: http://localhost:6006/")
    while True:
    prompt = "%s words: " % n_input
    sentence = input(prompt)
    sentence = sentence.strip()
    words = sentence.split(' ')
    if len(words) != n_input:
        continue
    try:
        symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
        for i in range(32):
            keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
            onehot_pred = session.run(pred, feed_dict={x: keys})
            onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
            sentence = "%s %s" % (sentence, reverse_dictionary[onehot_pred_index])
            symbols_in_keys = symbols_in_keys[1:]
            symbols_in_keys.append(onehot_pred_index)
        print(sentence)
    except:
        print("Word not in the dictionary!")

SyntaxError: invalid syntax (<ipython-input-1-217ffbc6e3c4>, line 5)