# Authorship Style Transfer

In [1]:
import numpy as np
import tensorflow as tf

---

## Data Preprocessing

In [2]:
text_file_path = "data/c50-articles.txt"
label_file_path = "data/c50-labels.txt"

### Conversion of texts into integer sequences

In [3]:
VOCAB_SIZE = 1000
MAX_SEQUENCE_LENGTH = 100
EMBEDDING_SIZE = 300

In [4]:
text_tokenizer = tf.keras.preprocessing.text.Tokenizer(
    num_words=VOCAB_SIZE, filters='!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n')

with open(text_file_path) as text_file:
    text_tokenizer.fit_on_texts(text_file)
    
with open(text_file_path) as text_file:
    integer_text_sequences = text_tokenizer.texts_to_sequences(text_file)

padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(
     integer_text_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')

print(padded_sequences.shape)

(2500, 100)


In [5]:
SOS_INDEX = text_tokenizer.word_index['<sos>']
EOS_INDEX = text_tokenizer.word_index['<eos>']
print(SOS_INDEX, EOS_INDEX)

53 54


### Conversion of labels to one-hot represenations

In [6]:
label_tokenizer =  tf.keras.preprocessing.text.Tokenizer(lower=False)

with open(label_file_path) as label_file:
    label_tokenizer.fit_on_texts(label_file)

with open(label_file_path) as label_file:
    label_sequences = label_tokenizer.texts_to_sequences(label_file)

NUM_LABELS = len(label_tokenizer.word_index)
one_hot_labels = np.asarray(list(
    map(lambda x: np.eye(NUM_LABELS, k=x[0])[0], label_sequences)))

print(one_hot_labels.shape)

(2500, 50)


---

## Deep Learning Model

### Setup Instructions

In [151]:
class GenerativeAdversarialNetwork():

    def __init__(self):
        self.build_model()
        
    def get_sentence_representation(self, index_sequence, word_embeddings):
        
        embedded_sequence = tf.nn.embedding_lookup(
            word_embeddings, index_sequence, name="embedded_sequence")

        lstm_cell_fw = tf.contrib.rnn.BasicLSTMCell(num_units=128, name="lstm_cell_fw_content")
        lstm_cell_bw = tf.contrib.rnn.BasicLSTMCell(num_units=128, name="lstm_cell_bw_content")

        rnn_outputs, rnn_states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=lstm_cell_fw, cell_bw=lstm_cell_bw, inputs=embedded_sequence, 
            dtype=tf.float32, time_major=False)
        rnn_state = tf.concat([rnn_states[0].h, rnn_states[1].h], axis=1)

        return rnn_state

    def get_content_representation(self, sentence_representation):
        
        dense_content = tf.layers.dense(
            inputs=sentence_representation, units=128, 
            activation=tf.nn.relu, name="content_representation")

        return dense_content

    def get_style_representation(self, sentence_representation):
        
        dense_style = tf.layers.dense(
            inputs=sentence_representation, units=128, 
            activation=tf.nn.relu, name="style_representation")
        return dense_style

    def get_label_prediction(self, content_representation):

        dense_1 = tf.layers.dense(
            inputs=content_representation, units=NUM_LABELS, 
            activation=tf.nn.relu, name="dense_1")
        
        softmax_output = tf.nn.softmax(dense_1, name="label_prediction")

        return softmax_output
    
    def generate_output_sequence(self, word_embeddings, style_representation, 
                                 content_representation):
        last_predicted_word_index = SOS_INDEX
        predicted_words = list()
        
        for i in range(MAX_SEQUENCE_LENGTH):
            last_word_embedding = tf.gather_nd(
                word_embeddings, [last_predicted_word_index], name='last_word_embedding')
#             print("last_word_embedding: {}".format(last_word_embedding))
            
            batch_size = tf.shape(style_representation)[0]
#             print("batch_size: {}".format(batch_size))
            
            tiled_last_word_embedding = tf.tile(
                input=last_word_embedding,  multiples=[batch_size], name="tiled_last_word_embedding")
#             print("tiled_last_word_embedding: {}".format(tiled_last_word_embedding))
            
            matrix = tf.reshape(tiled_last_word_embedding, [batch_size, 300])
#             print("matrix: {}".format(matrix))
        
            intermediate_representation = tf.concat(
                values=[matrix, style_representation, content_representation],
                axis=1, name='intermediate_representation')
#             print("intermediate_representation: {}".format(intermediate_representation))
            
            dense_intermediate_representation = tf.layers.dense(
                inputs=intermediate_representation, units=VOCAB_SIZE + 1, 
                activation=tf.nn.relu, name="dense_intermediate_representation", 
                reuse=tf.AUTO_REUSE)
            
            softmax = tf.nn.softmax(
                dense_intermediate_representation, name="softmax")
            
#             word_prediction = tf.contrib.seq2seq.hardmax(
#                 softmax, name="word_prediction")
#             print("word_prediction: {}".format(word_prediction))

            predicted_words.append(softmax)
            
            last_predicted_word_index = tf.argmax(
                softmax, axis=1, name="last_predicted_word_index")
            print("last_predicted_word_index: {}".format(last_predicted_word_index))
            
            if last_predicted_word_index == EOS_INDEX:
                break
                
        predicted_sequence = tf.stack(
            values=predicted_words, axis=1, name='stack')
        
        return predicted_sequence



    def build_model(self):
        # input variable - text sequence converted to an index sequence
        self.input_sequence = tf.placeholder(
            tf.int32, [None, MAX_SEQUENCE_LENGTH], name="input_sequence")
        print("input_sequence: {}".format(self.input_sequence))

        self.input_label = tf.placeholder(
            tf.float32, [None, NUM_LABELS], name="input_label")
        print("input_label: {}".format(self.input_label))

        # learn embeddings matrix - can be initialized with pre-trained embeddings
        word_embeddings = tf.get_variable(
            shape=[VOCAB_SIZE + 1, EMBEDDING_SIZE], name="word_embeddings", 
            dtype=tf.float32)
        print("word_embeddings: {}".format(word_embeddings))

        # get sentence representation
        sentence_representation = self.get_sentence_representation(self.input_sequence, word_embeddings)
        print("sentence_representation: {}".format(sentence_representation))

        # get content representation
        content_representation = self.get_content_representation(sentence_representation)
        print("content_representation: {}".format(content_representation))

        # use content representation to predict a label
        self.label_prediction = self.get_label_prediction(content_representation)
        print("label_prediction: {}".format(self.label_prediction))

        self.adversarial_loss = tf.losses.softmax_cross_entropy(
            onehot_labels=self.input_label, logits=self.label_prediction)
        print("adversarial_loss: {}".format(self.adversarial_loss))

        self.adversarial_loss_summary = tf.summary.scalar(
            tensor=self.adversarial_loss, name="adversarial_loss")

        # get style representation
        style_representation = self.get_style_representation(sentence_representation)
        print("style_representation: {}".format(style_representation))
        
        # generate new sentence
        generated_logits = self.generate_output_sequence(
            word_embeddings, style_representation, content_representation)
        print("generated_logits: {}".format(generated_logits))
        
        self.reconstruction_loss = tf.contrib.seq2seq.sequence_loss(
            logits=generated_logits, targets=self.input_sequence, 
            weights=tf.ones(tf.shape(self.input_sequence)), name="reconstruction_loss")
        print("reconstruction_loss: {}".format(self.reconstruction_loss))

        self.reconstruction_loss_summary = tf.summary.scalar(
            tensor=self.reconstruction_loss, name="reconstruction_loss")


    def train(self, sess):

        writer = tf.summary.FileWriter(logdir="tensorflow_logs")
        
        adversarial_training_operation = tf.train.AdamOptimizer().minimize(self.adversarial_loss)
        
        print(tf.trainable_variables())
        reconstruction_training_operation = tf.train.AdamOptimizer().minimize(self.reconstruction_loss)
        
        sess.run(tf.global_variables_initializer())

        epoch_reporting_interval = 1
        training_examples_fraction = 0.9
        training_examples_size = int(training_examples_fraction * len(one_hot_labels))
        batch_size = 100
        training_epochs = 10
        num_batches = int(training_examples_size/batch_size)

        training_step = 1
        for current_epoch in range(1, training_epochs + 1):
            for batch_number in range(num_batches):
                _, adv_loss, adv_loss_sum, _, rec_loss, rec_loss_sum = sess.run(
                    [adversarial_training_operation, self.adversarial_loss, self.adversarial_loss_summary,
                     reconstruction_training_operation, self.reconstruction_loss, self.reconstruction_loss_summary], 
                    feed_dict={
                        self.input_sequence: padded_sequences[
                            batch_number * batch_size : (batch_number + 1) * batch_size],
                        self.input_label: one_hot_labels[
                            batch_number * batch_size : (batch_number + 1) * batch_size]})
                writer.add_summary(adv_loss_sum, training_step)
                writer.add_summary(adv_loss_sum, training_step)
                writer.flush()
                training_step += 1

            if (current_epoch % epoch_reporting_interval == 0):
                print("Training epoch: {}; Adversarial Loss: {}; Reconstruction Loss: {}"
                      .format(current_epoch, adv_loss, rec_loss))

        training_predictions = sess.run(
            self.label_prediction, 
            feed_dict={
                self.input_sequence: padded_sequences[:training_examples_size], 
                self.input_label: one_hot_labels[:training_examples_size]
            })

        test_predictions = sess.run(
            self.label_prediction, 
            feed_dict={
                self.input_sequence: padded_sequences[training_examples_size:], 
                self.input_label: one_hot_labels[training_examples_size:]
            })

        writer.close()

### Train Network

In [152]:
tf.reset_default_graph()
gan = GenerativeAdversarialNetwork()

input_sequence: Tensor("input_sequence:0", shape=(?, 100), dtype=int32)
input_label: Tensor("input_label:0", shape=(?, 50), dtype=float32)
word_embeddings: <tf.Variable 'word_embeddings:0' shape=(1001, 300) dtype=float32_ref>
sentence_representation: Tensor("concat:0", shape=(?, 256), dtype=float32)
content_representation: Tensor("content_representation/Relu:0", shape=(?, 128), dtype=float32)
label_prediction: Tensor("label_prediction:0", shape=(?, 50), dtype=float32)
adversarial_loss: Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32)
style_representation: Tensor("style_representation/Relu:0", shape=(?, 128), dtype=float32)
generated_logits: Tensor("stack:0", shape=(?, 100, 1001), dtype=float32)
reconstruction_loss: Tensor("reconstruction_loss/truediv:0", shape=(), dtype=float32)


In [153]:
with tf.Session() as sess:
    gan.train(sess)

[<tf.Variable 'word_embeddings:0' shape=(1001, 300) dtype=float32_ref>, <tf.Variable 'bidirectional_rnn/fw/lstm_cell_fw_content/kernel:0' shape=(428, 512) dtype=float32_ref>, <tf.Variable 'bidirectional_rnn/fw/lstm_cell_fw_content/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'bidirectional_rnn/bw/lstm_cell_bw_content/kernel:0' shape=(428, 512) dtype=float32_ref>, <tf.Variable 'bidirectional_rnn/bw/lstm_cell_bw_content/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'content_representation/kernel:0' shape=(256, 128) dtype=float32_ref>, <tf.Variable 'content_representation/bias:0' shape=(128,) dtype=float32_ref>, <tf.Variable 'dense_1/kernel:0' shape=(128, 50) dtype=float32_ref>, <tf.Variable 'dense_1/bias:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'style_representation/kernel:0' shape=(256, 128) dtype=float32_ref>, <tf.Variable 'style_representation/bias:0' shape=(128,) dtype=float32_ref>, <tf.Variable 'dense_intermediate_representation/kernel:0' shape=(556, 1001) dt

InvalidArgumentError: index innermost dimension length must be <= params rank; saw: 100 vs. 2
	 [[Node: last_word_embedding_1 = GatherNd[Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](word_embeddings/read, last_word_embedding_1/indices)]]
	 [[Node: softmax_cross_entropy_loss/value/_429 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_5204_softmax_cross_entropy_loss/value", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'last_word_embedding_1', defined at:
  File "/usr/lib64/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib64/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tornado/ioloop.py", line 832, in start
    self._run_callback(self._callbacks.popleft())
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tornado/ioloop.py", line 605, in _run_callback
    ret = callback()
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-152-90781ad721a9>", line 2, in <module>
    gan = GenerativeAdversarialNetwork()
  File "<ipython-input-151-4aadc325bc58>", line 4, in __init__
    self.build_model()
  File "<ipython-input-151-4aadc325bc58>", line 140, in build_model
    word_embeddings, style_representation, content_representation)
  File "<ipython-input-151-4aadc325bc58>", line 53, in generate_output_sequence
    word_embeddings, [last_predicted_word_index], name='last_word_embedding')
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2000, in gather_nd
    "GatherNd", params=params, indices=indices, name=name)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/home/v2john/.pyenv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): index innermost dimension length must be <= params rank; saw: 100 vs. 2
	 [[Node: last_word_embedding_1 = GatherNd[Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](word_embeddings/read, last_word_embedding_1/indices)]]
	 [[Node: softmax_cross_entropy_loss/value/_429 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_5204_softmax_cross_entropy_loss/value", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
