In [1]:
import os
import sys
import time
import random
import pickle as pickle
import numpy as np
import tensorflow as tf
from utils import *
from nn import *

from tqdm import tqdm

Using TensorFlow backend.


## Import all the data

In [2]:
# import all the data
data = load_10_people()
X_train_ohe = data['X_train'].astype(np.float64)
X_train_nums = X_train_ohe.argmax(-1)
Y_train = data['Y_train'].astype(np.float64)

Loading 1004399 tweets from 4391 unique users.
Loading Twitter dataset took 2 seconds.
Number of Tweets: 97728
Only keeping characters that appear at least 100 times in the corpus
Character set consists of 246 characters
Building X...
Building Y...
Splitting Data...
79159 train char sequences
9773 test char sequences
8796 validation char sequences


## Some constants

In [3]:
# Create the encoder RNN with one GRU cell
vocab_size = 246
embedding_size = 100
max_seq_length = 140
n_layers = 1
batch_size = 50
nepochs = 10
dropout_rate = 0.5
dim_y = 10
dim_h = 100
learning_rate = 0.001

## Helper functions

In [4]:
def gru_cell(dim, n_layers, dropout):
    cell = tf.nn.rnn_cell.GRUCell(dim)
    cell = tf.nn.rnn_cell.DropoutWrapper(cell,
        input_keep_prob=dropout)
    if n_layers > 1:
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers)
    return cell

# fully-conected layer
def dense(x, inputFeatures, outputFeatures, scope):
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [inputFeatures, outputFeatures],
                                 tf.float32, tf.random_normal_initializer(stddev=0.02))
        bias = tf.get_variable("bias", [outputFeatures], initializer=tf.constant_initializer(0.0))
        
        return tf.matmul(x, matrix) + bias
    
# take the encoded sentence and try to predict mu and sigma
def recognition(latent_vector, dim_mu = dim_h, dim_s = dim_h):
        with tf.variable_scope("recognition"):
            w_mean = dense(latent_vector, dim_h, dim_mu, "w_mean")
            w_stddev = dense(latent_vector, dim_h, dim_s, "w_stddev")
        return w_mean, w_stddev
    
def reconstruction_loss(targets, logits):
    with tf.variable_scope("rec_loss"):
        loss_g = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.reshape(targets, [-1, vocab_size]), logits=logits)
        loss_g = tf.reduce_sum(loss_g) / tf.to_float(batch_size)

        preds = tf.argmax(tf.nn.softmax(logits), -1)
        goalz = tf.cast(tf.reshape(encoder_inputs, [-1]), tf.int64)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(preds, goalz), tf.float32))
        
    return loss_g, accuracy

## The Network - AAE with adversarial

In [5]:
def discriminator(latent_encoding, labels):
    with tf.variable_scope("discriminator"):
        logits = dense(latent_encoding, dim_h, 10, "discriminator")
        disc_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = labels, logits = logits))
        
        preds = tf.argmax(tf.nn.softmax(logits), -1)
        goalz = tf.argmax(labels, -1)
        disc_acc = tf.reduce_mean(tf.cast(tf.equal(preds, goalz), tf.float32))
        
    return disc_loss, disc_acc

In [None]:
tf.reset_default_graph()

# Inputs
encoder_inputs = tf.placeholder(shape = [batch_size, max_seq_length], name = "input_sentences", dtype=tf.int32)
targets = tf.placeholder(shape = [batch_size, max_seq_length, vocab_size], name = "target_sentences", dtype=tf.int32)
labels = tf.placeholder(shape = [batch_size], name = 'labels', dtype=tf.int32)
labels = tf.reshape(labels, [-1, 1])

# Embedding matrices
# TensorShape([Dimension(246), Dimension(100)])
embedding_encoder = tf.get_variable("embedding_matrix", [vocab_size, embedding_size])
embedding_decoder = tf.get_variable("output_embedding_matrix", [vocab_size, embedding_size])

# Extract Embeddings
#TensorShape([Dimension(140), Dimension(50), Dimension(100)])
encoder_emb_inp = tf.nn.embedding_lookup(embedding_encoder, encoder_inputs, name = "encoder_embeddings")
encoder_emb_out = tf.nn.embedding_lookup(embedding_decoder, encoder_inputs, name = "decoder_embeddings")

# Encoder Net
# dim_h is the dimension of the hidden state
cell_e = create_cell(dim_h, n_layers, dropout_rate)
_, z = tf.nn.dynamic_rnn(cell_e, encoder_emb_inp, dtype = tf.float32, scope='encoder')

# Decoder Net
cell_g = create_cell(dim_h, n_layers, dropout_rate)
g_outputs, _ = tf.nn.dynamic_rnn(cell_g, encoder_emb_out,
            initial_state = z, scope='generator')
g_outputs = tf.nn.dropout(g_outputs, dropout_rate)
# flatten all the outputs and take through final FC layer for classification
g_outputs = tf.reshape(g_outputs, [-1, dim_h])
g_logits = dense(g_outputs, dim_h, vocab_size, scope='output_fc')

# Losses
disc_loss, disc_acc = discriminator(z,tf.reshape(tf.one_hot(labels, 10),[batch_size, -1]) )
rec_loss, rec_acc = reconstruction_loss(targets, g_logits)
rho = 0.3
total_loss = rec_loss + rho*disc_loss

# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

## Training

In [None]:
sess = tf.Session()
sess.run([tf.global_variables_initializer()])

max_iter = 5000
summary_iter = 200

rec_accuracy_ = []
loss_ = []
disc_accuracy_ = []

for i in tqdm(range(max_iter)):
        sess.run([optimizer], feed_dict={encoder_inputs: X_train_nums[i:i+batch_size], 
                                                       targets: X_train_ohe[i:i+batch_size],
                                                       labels: np.argmax(Y_train[i:i+batch_size], 1)})
        if i % summary_iter == 0:
            random_ix = np.random.choice(np.arange(len(X_train_nums)),size = batch_size,replace=True)
            l, a, disc_a = sess.run([total_loss, rec_acc, disc_acc], feed_dict={encoder_inputs: X_train_nums[random_ix], 
                                                       targets: X_train_ohe[random_ix],
                                                       labels: np.argmax(Y_train[random_ix], 1)})
            
            # Get discriminator's accuracy
            print("Dicriminator Accuracy {}".format(disc_a))
            
            # Get total loss and reconstruction Accuracy
            print("Loss: {}".format(l))
            print("Accuracy {}".format(a))
            
            rec_accuracy_.append(a)
            loss_.append(l)
            disc_accuracy_.append(disc_a)

  0%|          | 1/5000 [00:01<2:14:35,  1.62s/it]

Dicriminator Accuracy 0.18000000715255737
Loss: 483.89935302734375
Accuracy 0.01842857152223587


  4%|▍         | 201/5000 [03:12<1:26:50,  1.09s/it]

Dicriminator Accuracy 0.2199999988079071
Loss: 256.6048583984375
Accuracy 0.14585714042186737


  8%|▊         | 401/5000 [06:36<1:22:12,  1.07s/it]

Dicriminator Accuracy 0.41999998688697815
Loss: 205.06509399414062
Accuracy 0.3107142746448517


 10%|█         | 522/5000 [08:35<1:11:18,  1.05it/s]