# Text Generation with RNN
Recurrent Neural Network used to analyze textual or visual data that is sequential.


## Importing Libraries

In [None]:
# Import the tensorflow library.
import tensorflow as tf

# Import the numpy library.
import numpy as np

# Import the operating system.
import os

# Import the time library.
import time

## Reading the Recipes File

In [None]:
# Add the "recipes.txt" file path from your student folder.
path_to_file=("recipes.txt")

# Open the file in your program.
text=open(path_to_file,"rb").read().decode(encoding="utf8")

# Print the first 250 characters of the file.
print(text[:250])

FileNotFoundError: [Errno 2] No such file or directory: 'recipes.txt'

## Length and Unique Characters


In [None]:
# Print the length of the text.
print(len(text))

2390691


In [None]:
# Create a variable called vocab and sort the text into a set.
vocab=sorted(set(text))

# Print the vocab set.
print(vocab)

['\t', '\n', ' ', '!', '"', '#', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '\\', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '~']


In [None]:
# Print out the length of the vocab set.
len(vocab)

86

# Vectorizing


In [None]:
# Create a list called example_texts and add some random text.
example_texts=["the quick brown fox", "jumped"]

In [None]:
# Create a variable called chars, and split the text to tokenize it.
chars=tf.strings.unicode_split(example_texts, input_encoding="UTF-8")

# Print out the characters.
print(chars)

<tf.RaggedTensor [[b't', b'h', b'e', b' ', b'q', b'u', b'i', b'c', b'k', b' ', b'b', b'r',
  b'o', b'w', b'n', b' ', b'f', b'o', b'x']                              ,
 [b'j', b'u', b'm', b'p', b'e', b'd']]>


In [None]:
# Create a layer called ids_from_chars that will convert characters to ids using the vocab set.
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)

# Create a variable called ids and get the ids from the layer that was created above.
ids = ids_from_chars(chars)

# Print out the ids from the layer.
print(ids)

<tf.RaggedTensor [[79, 67, 64, 3, 76, 80, 68, 62, 70, 3, 61, 77, 74, 82, 73, 3, 65, 74, 83],
 [69, 80, 72, 75, 64, 63]]>


In [None]:
# Create a layer called chars_from_ids that will convert ids to characters using the vocab set.
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

# Create a variable called chars and get the characters from the layer that was created above.
chars = chars_from_ids(ids)

# Print out the characters from the layer.
print(chars)

<tf.RaggedTensor [[b't', b'h', b'e', b' ', b'q', b'u', b'i', b'c', b'k', b' ', b'b', b'r',
  b'o', b'w', b'n', b' ', b'f', b'o', b'x']                              ,
 [b'j', b'u', b'm', b'p', b'e', b'd']]>


In [None]:
# Create a function called text_from_ids to convert the ids into text.
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

print(text_from_ids(ids))

tf.Tensor([b'the quick brown fox' b'jumped'], shape=(2,), dtype=string)


# Creating the Training Data


## Create the Dataset

In [None]:
# Create a variable called all_ids and convert the text into ids.
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))

# Print out all the ids and shape.
print(all_ids)

tf.Tensor([45 74 15 ... 16  2  2], shape=(2390691,), dtype=int64)


In [None]:
# Create a variable called ids_dataset use tensorflow to create a dataset.
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
# Create a for loop that iterates through 10 pieces of the ids dataset using the .take(n) function.
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

N
o
-
B
a
k
e
 
N
u


## Divide Text into Sequences


In [None]:
# Create a variable called seq_length and set the value to the length of your sequences, 100.
seq_length = 100

# Set how many examples you want to run per epoch
examples_per_epoch = len(text)//(seq_length+1)

# Create a variable called sequences and use the batch function to divide the data into sequences.
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

# Create a for loop, and use the .take() function to take 1 sequence from sequences.
for seq in sequences.take(1):
    print(chars_from_ids(seq))

tf.Tensor(
[b'N' b'o' b'-' b'B' b'a' b'k' b'e' b' ' b'N' b'u' b't' b' ' b'C' b'o'
 b'o' b'k' b'i' b'e' b's' b'\n' b'1' b' ' b'c' b'.' b' ' b'f' b'i' b'r'
 b'm' b'l' b'y' b' ' b'p' b'a' b'c' b'k' b'e' b'd' b' ' b'b' b'r' b'o'
 b'w' b'n' b' ' b's' b'u' b'g' b'a' b'r' b'\n' b' ' b'1' b'/' b'2' b' '
 b'c' b'.' b' ' b'e' b'v' b'a' b'p' b'o' b'r' b'a' b't' b'e' b'd' b' '
 b'm' b'i' b'l' b'k' b'\n' b' ' b'1' b'/' b'2' b' ' b't' b's' b'p' b'.'
 b' ' b'v' b'a' b'n' b'i' b'l' b'l' b'a' b'\n' b' ' b'1' b'/' b'2' b' '
 b'c' b'.' b' '], shape=(101,), dtype=string)


In [None]:
# Create a for loop using the .take() function to take 5 sequences.
for seq in sequences.take(5):
    print(text_from_ids(seq))

tf.Tensor(b'No-Bake Nut Cookies\n1 c. firmly packed brown sugar\n 1/2 c. evaporated milk\n 1/2 tsp. vanilla\n 1/2 c. ', shape=(), dtype=string)
tf.Tensor(b'broken nuts (pecans)\n 2 Tbsp. butter or margarine\n 3 1/2 c. bite size shredded rice biscuitsIn a heav', shape=(), dtype=string)
tf.Tensor(b'y 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine.\n Stir over medium', shape=(), dtype=string)
tf.Tensor(b' heat until mixture bubbles all over top.\n Boil and stir 5 minutes more. Take off heat.\n Stir in vani', shape=(), dtype=string)
tf.Tensor(b'lla and cereal; mix well.\n Using 2 teaspoons, drop and shape into 30 clusters on wax paper.\n Let stan', shape=(), dtype=string)


## Create Input and Target Pairs


In [None]:
# Create a function called split_input_target that will split a sequence into input text and target text.
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [None]:
# Create a variable called example_text and add some sample text.
example_text = "i love idtech"

# Print the split input and target lists.
print(split_input_target(list(example_text)))

(['i', ' ', 'l', 'o', 'v', 'e', ' ', 'i', 'd', 't', 'e', 'c'], [' ', 'l', 'o', 'v', 'e', ' ', 'i', 'd', 't', 'e', 'c', 'h'])


In [None]:
# Use the split_input_target to create a dataset of the input and target text for each sequence.
dataset = sequences.map(split_input_target)

In [None]:
# Create a for loop that iterates through the dataset to get 1 input example and target example.
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'No-Bake Nut Cookies\n1 c. firmly packed brown sugar\n 1/2 c. evaporated milk\n 1/2 tsp. vanilla\n 1/2 c.'
Target: b'o-Bake Nut Cookies\n1 c. firmly packed brown sugar\n 1/2 c. evaporated milk\n 1/2 tsp. vanilla\n 1/2 c. '


# Final Touches


In [None]:
# Set a batch size to 64 so that the data can be entered into the network.
BATCH_SIZE = 64

# Set a buffer size to 1000 so that you can shuffle the dataset without using too much memory.
BUFFER_SIZE = 10000

# Prepare your dataset to train the network.
dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

# Print the final prepared dataset.
print(dataset)

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>


# Building Network


## Network Setup

In [None]:
# Create a variable called vocab_size and set the value as the length of the vocab.
vocab_size = len(vocab)

# Create a variable called embedding_dim and set the value as the 256.
embedding_dim = 256

# Create a variable called rnn_units and set the value as 1024 to get the network started.
rnn_units = 1024

## Model Breakdown


In [None]:
# Create a class called MyModel to define the model with two functions called init and call.
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                       return_sequences=True,
                                       return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x

## Model Design


In [None]:
# Create a model variable using the MyModel class.
model=MyModel(vocab_size=len(ids_from_chars.get_vocabulary()), embedding_dim=embedding_dim, rnn_units=rnn_units)

TypeError: Layer.__init__() takes 1 positional argument but 2 were given

In [None]:
# Create a for loop to set the batch size, sequence length, and vocab size of an example from the dataset.
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions=model(input_example_batch)
    print(example_batch_predictions.shape, " (batch_size, sequence_length, vocab_size)")

In [None]:
# Print the summary of the model.
model.summary()

## Random Example


In [None]:
# Add in sampled indices to your code to see how the program functions.
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()


# Print out the input and next character predictions for the sample.
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print("\nNext Char Predictions:\n", text_from_ids(sampled_indices).numpy())

# Training


In [None]:
# Set the loss function.
loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True)

# Get an example batch mean loss.
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)


# Add a print statement to print the shape of example_batch_predictions.
print(example_batch_predictions.shape)
print("^ # (batch_size, sequence_length, vocab_size)")


# Add a print statement to print example_batch_mean_loss.
print(example_batch_mean_loss)


# Print the exponential of the average loss.
print("Exponential of average loss: ", tf.exp(example_batch_mean_loss).numpy())


In [None]:
# Set the network's optimizer and loss.
model.compile(optimizer='adam', loss=loss)


In [None]:
# Add some checkpoints to keep track of the training process.
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_prefix, save_weights_only=True)

In [None]:
# Set the number of epochs to 20.
epochs=20

In [None]:
# Add the fit function and set the input data for the model.
history=model.fit(dataset, epochs=epochs, callbacks=[checkpoint_callback])

# Generate Text


## Define a One Step Model & Generate One-Step Function

In [None]:
# Create a class called OneStep that uses the model to predict new text.
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
          values=[-float('inf')]*len(skip_ids),
          indices=skip_ids,
          dense_shape=[len(ids_from_chars.get_vocabulary())])

        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        predicted_logits, states = self.model(inputs=input_ids, states=states, return_state=True)
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        predicted_logits = predicted_logits

        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        predicted_chars = self.chars_from_ids(predicted_ids)

        return predicted_chars, states

## Model Setup


In [None]:
# Generate a "one step" model using the OneStep class.
one_step_model=OneStep(model, chars_from_ids, ids_from_chars)

In [None]:
# Initialize the states to None.
states=None

In [None]:
# Create a variable called next_char and add the text to start the network.
next_char=tf.constant([" "])

In [None]:
# Create a list called result and add the next_char.
result= [next_char]

In [None]:
# Create a for loop with the amount of characters that you want generate (1000 is a good number to start with).
for n in range (1000):
    next_char, states =  one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

In [None]:
# Take the results from the for loop and join it together to create a string.
result = tf.strings.join(result)

In [None]:
# Print the result to see the joined string.
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

In [None]:
# Adjust your network below.


## Exporting Model


In [None]:
# Export your model.
tf.saved_model.save(one_step_model, 'one_step')


In [None]:
# Reload the model from the one_step folder.
data=tf.saved_model.load("one_step")