In [2]:
# Imports
import tensorflow as tf
import numpy as np
import os
import time

In [3]:
# download dataset
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [5]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')
print(text[:250])

Length of text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [9]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')
print(vocab)

65 unique characters
['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


# Processing the text

In [10]:
# function that splits into tokens and converts characters to id
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)

# function that converts id to chracters
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [11]:
 # function that joins characters back into strings
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [13]:
# splits the dataset and converts characters to id
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8')).numpy()
all_ids

array([19, 48, 57, ..., 46,  9,  1], dtype=int64)

In [17]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [20]:
seq_length = 100

# sequence and label
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))
  print(text_from_ids(seq).numpy())

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)
b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [22]:
# function that splits sequences into input and target
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [23]:
# Batch size
BATCH_SIZE = 64

BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

# Create RNN Model 


In [25]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())
print(vocab_size)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

66


In [34]:
# no need to specify the length of the sequences, model can accept any length

class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [35]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [62]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
    print(example_batch_predictions[0])
    
example_batch_predictions[:, -1, :]

(64, 100, 66) # (batch_size, sequence_length, vocab_size)
tf.Tensor(
[[-1.9334422  -0.3488962   1.4828081  ... -0.8846549   1.5826918
  -1.1738349 ]
 [-2.3274357  -1.9890401  -0.5178334  ... -0.59782416  0.8016198
   0.74277264]
 [-4.602709    1.8313886   3.973525   ... -1.9603542   1.1835002
  -1.73887   ]
 ...
 [-5.6212564  -0.48659518  1.4783057  ... -2.4719584   1.0856739
  -0.26215833]
 [-5.706069    2.0034118   4.6389537  ... -3.4942462   0.5997738
  -2.5121825 ]
 [-7.070782    2.3340266   5.3439507  ... -4.03253     1.0826193
  -3.774559  ]], shape=(100, 66), dtype=float32)
tf.Tensor(
[[-7.0707822e+00  2.3340266e+00  5.3439507e+00 ... -4.0325298e+00
   1.0826193e+00 -3.7745590e+00]
 [-5.8076353e+00 -1.4261122e+00  3.2631416e+00 ... -1.3284305e+00
   3.6629736e+00 -3.4907199e-03]
 [-4.3919621e+00 -3.5034957e+00  1.2084309e-01 ... -4.9032840e-01
   2.1410909e+00  5.5546522e-01]
 ...
 [-4.3845778e+00 -3.6106577e+00 -3.9978757e+00 ... -3.4689653e+00
   1.6002932e+00 -2.7677622e+00]


In [37]:
model.summary()

Model: "my_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     multiple                  16896     
                                                                 
 gru_3 (GRU)                 multiple                  3938304   
                                                                 
 dense_3 (Dense)             multiple                  67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [39]:
# random sample with the biggest chance 
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
#print(sampled_indices)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
print(sampled_indices)

[65  4 63 52 50 39 13 16 44 28 44  5 15 41 31 10 16 56 35 17 25  4 19 51
  2 48 31 43 20 43 21  1 33  9 44 48 65 21  3 59 17 45 52 10  0  4 15 40
 28 19 23 58 27 38 59 13  2 32 45 17 16 39 62 57  0 49 20 10 15 57 13 21
 52 36 36 12 28 20 16  7 28 35 31 37 39 24 24 38 47 10 46 46 50 57 39 28
 41 58 55 28]


In [40]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'ou within the compass of my curse.\n\nBUCKINGHAM:\nNor no one here; for curses never pass\nThe lips of t'
Next Char Predictions:
 b'z$xmkZ?CeOe&BbR3CqVDL$Fl iRdGdH\nT.eizH!tDfm3[UNK]$BaOFJsNYt? SfDCZwr[UNK]jG3Br?HmWW;OGC,OVRXZKKYh3ggkrZObspO'


# Training Model

In [41]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [42]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.1903133, shape=(), dtype=float32)


In [43]:
tf.exp(example_batch_mean_loss).numpy()

66.04348

In [44]:
model.compile(optimizer='adam', loss=loss)

In [45]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [46]:
EPOCHS = 1

In [47]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])



In [53]:
print(ids_from_chars(['[UNK]']))
print(ids_from_chars(['[UNK]'])[:, None])
skip_ids = ids_from_chars(['[UNK]'])[:, None]

tf.Tensor([0], shape=(1,), dtype=int64)
tf.Tensor([[0]], shape=(1, 1), dtype=int64)


In [59]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [60]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [61]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
Sow ile ars aling fion, Ald inie de for,sesses toour
Amun', wok' tik noscee.
Peare. Wher, thay, lot hiontly geallim;
Tors hithes ou nager hat
Th bavo, not
Aly soo telis foubnous bfiy son,
Beve profne an if l,
Bury heapedod has, wa menge you wor cickain, for stimeze, sour dith:
Thath of the mor.

PORLISAE:
Wite sesar,e?

Pod apang of yonqust moide
Maftunn
whlond iod me't my hat meds wime?

BQCCELTUNFO:
An to dom?

MENGE:
Mamvang of he lige.

ThRYLIRCEN:
I Tocd, Io gorur,.

RUBERTOCDO:
I aigh
Ce marche of att ie hend abrveave har of lusd hous on to gor it menge rot to dow lat meancho,
West ald chead an int Room buinstis neve lawes: do for lot's, ica she ane refo
tate pricithe', whing out co mary thy byous thows ape keroo angu; wo lesan'g,
Trets:
The beere
Thath the wist of youter hay orbus
Jur, wive hove,
Yy, Goknd com mind shour:
Or shard thocisin then tom of tod irign's hisge.

ABMIRY:
Fith beaving.

PmIA,
tho Whake:
Nopdange,
Af wir.

CINHINI HINI:
Aped at thge tis upelts.

HON

In [63]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step\assets


INFO:tensorflow:Assets written to: one_step\assets


In [64]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
Hes pe om uld foth maca.

TANDs Evy, Oadthed frougin myour the aud thot in suomst. IBant thesk you 
