In [71]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

In [72]:
shakespeare_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
filepath = keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
    shakespeare_text = f.read()

In [73]:
print(shakespeare_text[:200])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you


In [74]:
# Tokenize the full text so that each unique character will have a different character ID

tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(shakespeare_text)

In [75]:
# See one example from text to sequence
tokenizer.texts_to_sequences(["First"])

[[20, 6, 9, 8, 3]]

In [76]:
# One example from sequence (numbers) to text
tokenizer.sequences_to_texts([[2, 3, 4, 5,6,7,8,9]])

['e t o a i h s r']

In [77]:
# Total number of unique characters
max_id = len(tokenizer.word_index)
max_id

39

In [78]:
# All characters and their corresponding character ID
tokenizer.word_index

{'\n': 11,
 ' ': 1,
 '!': 31,
 '$': 39,
 '&': 38,
 "'": 28,
 ',': 18,
 '-': 32,
 '.': 27,
 '3': 37,
 ':': 24,
 ';': 29,
 '?': 30,
 'a': 5,
 'b': 22,
 'c': 19,
 'd': 13,
 'e': 2,
 'f': 20,
 'g': 21,
 'h': 7,
 'i': 6,
 'j': 33,
 'k': 25,
 'l': 12,
 'm': 15,
 'n': 10,
 'o': 4,
 'p': 23,
 'q': 34,
 'r': 9,
 's': 8,
 't': 3,
 'u': 14,
 'v': 26,
 'w': 17,
 'x': 35,
 'y': 16,
 'z': 36}

In [79]:
# Total number of characters (all of then, not only the unique ones)
dataset_size = tokenizer.document_count 
dataset_size

1115394

In [84]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1

In [85]:
train_size = dataset_size * 90 // 100
train_size

1003854

In [86]:
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset

<TensorSliceDataset shapes: (), types: tf.int64>

In [87]:
# Full dataset before windowing
list(dataset.take(20))

[<tf.Tensor: shape=(), dtype=int64, numpy=19>,
 <tf.Tensor: shape=(), dtype=int64, numpy=5>,
 <tf.Tensor: shape=(), dtype=int64, numpy=8>,
 <tf.Tensor: shape=(), dtype=int64, numpy=7>,
 <tf.Tensor: shape=(), dtype=int64, numpy=2>,
 <tf.Tensor: shape=(), dtype=int64, numpy=0>,
 <tf.Tensor: shape=(), dtype=int64, numpy=18>,
 <tf.Tensor: shape=(), dtype=int64, numpy=5>,
 <tf.Tensor: shape=(), dtype=int64, numpy=2>,
 <tf.Tensor: shape=(), dtype=int64, numpy=5>,
 <tf.Tensor: shape=(), dtype=int64, numpy=35>,
 <tf.Tensor: shape=(), dtype=int64, numpy=1>,
 <tf.Tensor: shape=(), dtype=int64, numpy=9>,
 <tf.Tensor: shape=(), dtype=int64, numpy=23>,
 <tf.Tensor: shape=(), dtype=int64, numpy=10>,
 <tf.Tensor: shape=(), dtype=int64, numpy=21>,
 <tf.Tensor: shape=(), dtype=int64, numpy=1>,
 <tf.Tensor: shape=(), dtype=int64, numpy=19>,
 <tf.Tensor: shape=(), dtype=int64, numpy=3>,
 <tf.Tensor: shape=(), dtype=int64, numpy=8>]

In [88]:
n_steps = 100
window_length = n_steps + 1
dataset = dataset.window(window_length, shift=1, drop_remainder=True)

In [89]:
# The dataset after windowing
list(dataset.take(5))

[<_VariantDataset shapes: (), types: tf.int64>,
 <_VariantDataset shapes: (), types: tf.int64>,
 <_VariantDataset shapes: (), types: tf.int64>,
 <_VariantDataset shapes: (), types: tf.int64>,
 <_VariantDataset shapes: (), types: tf.int64>]

In [90]:
# Convert the nested dataset into a flat dataset
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [91]:
list(dataset.take(2))

[<tf.Tensor: shape=(101,), dtype=int64, numpy=
 array([19,  5,  8,  7,  2,  0, 18,  5,  2,  5, 35,  1,  9, 23, 10, 21,  1,
        19,  3,  8,  1,  0, 16,  1,  0, 22,  8,  3, 18,  1,  1, 12,  0,  4,
         9, 15,  0, 19, 13,  8,  2,  6,  1,  8, 17,  0,  6,  1,  4,  8,  0,
        14,  1,  0,  7, 22,  1,  4, 24, 26, 10, 10,  4, 11, 11, 23, 10,  7,
        22,  1,  4, 24, 17,  0,  7, 22,  1,  4, 24, 26, 10, 10, 19,  5,  8,
         7,  2,  0, 18,  5,  2,  5, 35,  1,  9, 23, 10, 15,  3, 13,  0])>,
 <tf.Tensor: shape=(101,), dtype=int64, numpy=
 array([ 5,  8,  7,  2,  0, 18,  5,  2,  5, 35,  1,  9, 23, 10, 21,  1, 19,
         3,  8,  1,  0, 16,  1,  0, 22,  8,  3, 18,  1,  1, 12,  0,  4,  9,
        15,  0, 19, 13,  8,  2,  6,  1,  8, 17,  0,  6,  1,  4,  8,  0, 14,
         1,  0,  7, 22,  1,  4, 24, 26, 10, 10,  4, 11, 11, 23, 10,  7, 22,
         1,  4, 24, 17,  0,  7, 22,  1,  4, 24, 26, 10, 10, 19,  5,  8,  7,
         2,  0, 18,  5,  2,  5, 35,  1,  9, 23, 10, 15,  3, 13,  0,  4]

In [92]:
np.random.seed(42)
tf.random.set_seed(42)

In [93]:
# Shuffle the dataset in batch sizes and then label them as inouts and labels
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:,1:]))

In [94]:
list(dataset.take(1))

[(<tf.Tensor: shape=(32, 100), dtype=int64, numpy=
  array([[ 6,  5,  7, ...,  1,  0, 18],
         [ 6,  1,  0, ...,  1,  0, 11],
         [11, 12,  0, ...,  9, 29, 10],
         ...,
         [ 3, 27,  0, ..., 10,  7,  3],
         [18,  3,  9, ...,  5,  4,  9],
         [14,  7,  0, ...,  4,  8,  1]])>,
  <tf.Tensor: shape=(32, 100), dtype=int64, numpy=
  array([[ 5,  7,  0, ...,  0, 18,  4],
         [ 1,  0, 24, ...,  0, 11,  1],
         [12,  0, 21, ..., 29, 10, 10],
         ...,
         [27,  0,  2, ...,  7,  3, 19],
         [ 3,  9, 12, ...,  4,  9,  7],
         [ 7,  0,  3, ...,  8,  1,  0]])>)]

In [95]:
# Encode each character using one-hot-encoding
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [96]:
dataset = dataset.prefetch(1)

In [100]:
for X_batch, Y_batch in dataset.take(1):
    print(X_batch.shape, Y_batch.shape)

(32, 100, 39) (32, 100)


# Creating and Training the Model

In [107]:
# Create the model using two GRU cells
model = keras.models.Sequential([
  keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id], dropout=0.2, recurrent_dropout=0.2),
  keras.layers.GRU(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
  keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation='softmax'))
])



In [109]:
# Compile and train the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
history = model.fit(dataset, steps_per_epoch=train_size // batch_size, epochs=10)

Epoch 1/10
 1479/31370 [>.............................] - ETA: 7:32:11 - loss: 1.8591

KeyboardInterrupt: ignored