In [15]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
tf.enable_eager_execution()
import numpy as np
import os
import time
import pandas as pd

import re

In [16]:
with open('AllTrumpSpeechesCleaned.txt', encoding="utf8") as file:
    speeches = file.read()
text = speeches

In [17]:
vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])

In [18]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)


In [19]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [20]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text
dataset = sequences.map(split_input_target)

In [21]:
BATCH_SIZE = 256

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<DatasetV1Adapter shapes: ((256, 100), (256, 100)), types: (tf.int64, tf.int64)>

In [22]:
vocab_size = len(vocab)

embedding_dim = 64

rnn_units = 512

In [23]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
    return model

In [24]:
model = build_model(
    vocab_size = len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (256, None, 64)           5760      
_________________________________________________________________
gru_2 (GRU)                  (256, None, 512)          886272    
_________________________________________________________________
gru_3 (GRU)                  (256, None, 512)          1574400   
_________________________________________________________________
dense_1 (Dense)              (256, None, 90)           46170     
Total params: 2,512,602
Trainable params: 2,512,602
Non-trainable params: 0
_________________________________________________________________


In [25]:
def loss(labels, logits):
      return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [26]:
model.compile(optimizer='adam', loss=loss)

In [27]:
# Directory where the checkpoints will be saved
checkpoint_dir = 'training_checkpoints/'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_prefix,
        save_weights_only=True)

In [45]:
EPOCHS=1
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])



In [49]:
p_model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

p_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))


p_model.build(tf.TensorShape([1, None]))

In [50]:
def generate_text(model, start_string):
    # Evaluation step (generating text using the learned model)

    # Number of characters to generate
    num_generate = 1000

    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    # Experiment to find the best setting.
    temperature = 1.0

    # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the word returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # We pass the predicted word as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return(start_string + ''.join(text_generated))

In [51]:
print(generate_text(p_model, start_string=u"Tremendous "))

Tremendous cash you like Congress Caterpillar mater. Her pupal patens. And now he’s the worst politicians should get out.
But then when are you undernic our women and wo sold and you’ll never for the vote. The policies here are knerc, lies that we’re up to hardloke and then the best in violence. Their people really taken care of plant through will be going to stop it and he’s not going to happen.
And you know what o know this? No?
You got tos rich is about with two carery. Recount is avaitybody. Because when he said, “Oh my best. Okay? You know, I’m me, today see that I can make it vire less stapped. That won’t come in.
But we’re going to Mexico great and that they can do to Michigan. Bet it did with two days.
People come out fast and somebody will take the appreciate in. The group hearts the heroic of the grateorm. Because if that was without cutting? They pays benefit leagest people in the world alones and their liest to run for – no good to show that very walls desperately from a fr

In [52]:
print(generate_text(p_model, start_string=u"You will "))

You will be consraluse totally illegal immigration – that you’ve totally called and make it up. Instead of study. The country go, and we could rnCw is rigged.
Here is drnigration. Make is great Trump unemestave because we have rising our country and bleat me. Gr, and they were the oil, keep by people with Chicago, many of the FBI to Washington Bridge living trood in Michigan – safe totacking me on trade and becoming the game in a woman instive lies and then we win the same direction neit spend away on The Apprentice, amazing people.
We are just turned with the voters, for loyto, Cruze sendeng to strible. They want to go back to.
And she said “Oh, TPP. ThanHe made the American people, and we are going to start winning from the real efform.
And he wasn’t understand.
But it ter censured that it’s alone going to say MacDoll Japan.
Legally dat lies. Doctore even though a rene? Remember? He said “It’s no ble the one chance of Dirminals.” And then the people like Christmashe profit. Believe m

In [53]:
print(generate_text(p_model, start_string=u"I will "))

I will sig decade in the world. Left Johnson, hotels would say nI think I’m farlent for the United States — with Japan.
So I’de ated the money becoming conservated cities throw reconsting up lik the peopleh war right now is apared by far. And he was going to class and run because he was released and through weeks, and they discussed great veterans I’ve been said 10,000 7000 and the dancer radical Islamic terrorists the hell out so badly for by far to growe this citizens of the United States to pay for it’s all immigration natural gas determine and general long and here’s unfair safe neighbodyou’ll a speech in terms of noching right after plant…
And we’re going to start winning too much loy”, I appreciate it.
And you’re going to be losing phery – which is going to be right over there? They say.
But I am running to build a great, great company but I think that was very nice.
Big, big difference between China, they’re in the san the way they get in. Nobody really done a big chunk of what 