Gated recurrent unit example
Cho, Kyunghyun; van Merrienboer, Bart; Gulcehre, Caglar; Bahdanau, Dzmitry; Bougares, Fethi; Schwenk, Holger; Bengio, Yoshua (2014). "Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation"
Neural Network Embeddings Explained
https://towardsdatascience.com/neural-network-embeddings-explained-4d028e6f0526

In [1]:
import os
import tensorflow as tf
import numpy as np

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
                                       'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

In [4]:
print('length of text {} characters'.format(len(text)))

length of text 1115394 characters


In [5]:
text[:250]

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n'

In [6]:
vocab=sorted(set(text))

In [7]:
print('{} unique characters'.format(len(vocab)))

65 unique characters


In [8]:
char2idx = {unique:idx for idx, unique in enumerate(vocab)}

In [9]:
idx2char = np.array(vocab)

In [10]:
text_as_int = np.array([char2idx[char] for char in text])

In [11]:
print('{')
for char, _ in zip(char2idx, range(20)):
    print('    {:4s}: {:3d}'.format(repr(char), char2idx[char]))
print('  ...\n')

{
    '\n':   0
    ' ' :   1
    '!' :   2
    '$' :   3
    '&' :   4
    "'" :   5
    ',' :   6
    '-' :   7
    '.' :   8
    '3' :   9
    ':' :  10
    ';' :  11
    '?' :  12
    'A' :  13
    'B' :  14
    'C' :  15
    'D' :  16
    'E' :  17
    'F' :  18
    'G' :  19
  ...



In [12]:
print('{} ----> characters mapped to int ----> {}'.format(repr(text[:13]), text_as_int[:13]))

'First Citizen' ----> characters mapped to int ----> [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [13]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

In [14]:
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [15]:
# for i in char_dataset.take(5):
#     print(idx2char[i.numpy()])

# F
# i
# r
# s
# t

In [16]:
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

In [17]:
for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [18]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

In [19]:
dataset = sequences.map(split_input_target)

In [20]:
for input_example, target_example in dataset.take(1):
    print('input_data', repr(''.join(idx2char[input_example.numpy()])))
    print('target_data', repr(''.join(idx2char[target_example.numpy()])))

input_data 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
target_data 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [21]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print('step {:4d}'.format(i))
    print('   input {} ({:s})'.format(input_idx, repr(idx2char[input_idx])))
    print('   expected output {} ({:s})'.format(target_idx, repr(idx2char[target_idx])))
    
    

step    0
   input 18 ('F')
   expected output 47 ('i')
step    1
   input 47 ('i')
   expected output 56 ('r')
step    2
   input 56 ('r')
   expected output 57 ('s')
step    3
   input 57 ('s')
   expected output 58 ('t')
step    4
   input 58 ('t')
   expected output 1 (' ')


In [22]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [23]:
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [24]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

In [25]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    
    return model

In [26]:
model = build_model(vocab_size=len(vocab),
                    embedding_dim=embedding_dim,
                    rnn_units=rnn_units,
                    batch_size=BATCH_SIZE)

In [27]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, '# (batch_size, seq_length, vocab_size)')

(64, 100, 65) # (batch_size, seq_length, vocab_size)


In [28]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [29]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [30]:
model.compile(optimizer='adam', loss=loss)

In [31]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, 'chkpt_{epoch}')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [32]:
EPOCHS = 25

In [33]:
# history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Train for 172 steps
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [34]:
model = build_model(vocab_size=len(vocab),
                    embedding_dim=embedding_dim,
                    rnn_units=rnn_units,
                    batch_size=1)

In [35]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f61f4275be0>

In [36]:
model.build(tf.TensorShape([1, None]))

In [37]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [38]:
def generate_text(model, start_string):
    num_generate = 1000
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    
    text_generated = []
    temperature = 1.0
    
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
        
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
        
    return (start_string + ''.join(text_generated))

In [39]:
print(generate_text(model, start_string='ROMEO: '))

ROMEO: God grant me not; no: no, swear it.

Clown:
If it be so continue thou didst sent to king, as big as that,
And he shall spend mine honour'd friends,
I hate the golden storms to sea my servants.

VOLUMNIA:
A little cousin Francis be of woman,
His supposed up some gods for that him.

LORD WILLOUGHBY:
Breathe I am to get our hands and need thee with the stew here at Edward piece 't.

POLIXENES:
Most number in you.

First Officer:
Is Lord time have I; now fetch shall fall
On us along.

DUCHESS OF YORK:
My mother, Juliet?

LADY CAPULET:
What you must hear?

Lord:
'Tis sin ta'en her death hath left under our eyes, nor any other glive
To, but yield unto a word.

HENRY BOLINGBROKE:
Vouchsafe and leaxt with Rome's regard
From what he did being alt.

NORTHUMBERLAND:
The dog is deabh.

Second Senator:
Come on, bad, best for this so proposed withful shield;
Which 'twere to buy act out of half a hot-house!
O, now diest thou quickly.

DUKE VINCENTIO:
None, sir; I have particular such a tender;