# Shakespeare Sonnets with LSTMs

## Training

In [22]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation
from keras.callbacks import ModelCheckpoint, TensorBoard

with open("sonnets.txt") as corpus_file:
    corpus = corpus_file.read()
print("Loaded a corpus of {0} characters".format(len(corpus)))

print('\nData sample:\n')
print(corpus[:200])

Loaded a corpus of 94652 characters

Data sample:

﻿From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou, contracted to thine ow


In [23]:
# Get a unique identifier for each char in the corpus, then make some dicts to ease encoding and decoding
chars = sorted(list(set(corpus)))
num_chars = len(chars)
encoding = {c: i for i, c in enumerate(chars)}
decoding = {i: c for i, c in enumerate(chars)}
print("Our corpus contains {0} unique characters.".format(num_chars))

Our corpus contains 62 unique characters.


In [24]:
# chop up our data into X and y, slice into roughly (num_chars / skip) overlapping 'sentences'
# of length sentence_length, and encode the chars
sentence_length = 50
skip = 1
X_data = []
y_data = []
for i in range (0, len(corpus) - sentence_length, skip):
    sentence = corpus[i:i + sentence_length]
    next_char = corpus[i + sentence_length]
    X_data.append([encoding[char] for char in sentence])
    y_data.append(encoding[next_char])

num_sentences = len(X_data)
print("Sliced our corpus into {0} sentences of length {1}".format(num_sentences, sentence_length))

Sliced our corpus into 94602 sentences of length 50


In [25]:
# Vectorize our data and labels. We want everything in one-hot
print("Vectorizing X and y...")
X = np.zeros((num_sentences, sentence_length, num_chars), dtype=np.bool)
y = np.zeros((num_sentences, num_chars), dtype=np.bool)
for i, sentence in enumerate(X_data):
    for t, encoded_char in enumerate(sentence):
        X[i, t, encoded_char] = 1
    y[i, y_data[i]] = 1

# Double check our vectorized data before we sink hours into fitting a model
print("Sanity check y. Dimension: {0} # Sentences: {1} Characters in corpus: {2}".format(y.shape, num_sentences, len(chars)))
print("Sanity check X. Dimension: {0} Sentence length: {1}".format(X.shape, sentence_length))

Vectorizing X and y...
Sanity check y. Dimension: (94602, 62) # Sentences: 94602 Characters in corpus: 62
Sanity check X. Dimension: (94602, 50, 62) Sentence length: 50


In [26]:
model = Sequential()
model.add(LSTM(256, input_shape=(sentence_length, num_chars)))
model.add(Dense(num_chars))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 256)               326656    
_________________________________________________________________
dense_3 (Dense)              (None, 62)                15934     
_________________________________________________________________
activation_3 (Activation)    (None, 62)                0         
Total params: 342,590
Trainable params: 342,590
Non-trainable params: 0
_________________________________________________________________


In [27]:
architecture = model.to_yaml()
with open('model.yaml', 'a') as model_file:
    model_file.write(architecture)

In [28]:
file_path="weights-{epoch:02d}-{loss:.3f}.hdf5"
checkpoint = ModelCheckpoint(file_path, monitor="loss", verbose=1, save_best_only=True, mode="min")
tensorboard = TensorBoard(log_dir='./logs/min_train_loss')
callbacks = [checkpoint, tensorboard]

In [30]:
model.fit(X, y, epochs=30, batch_size=128, callbacks=callbacks)

Epoch 1/30

Epoch 00001: loss improved from inf to 2.22969, saving model to weights-01-2.230.hdf5
Epoch 2/30

Epoch 00002: loss improved from 2.22969 to 2.00871, saving model to weights-02-2.009.hdf5
Epoch 3/30

Epoch 00003: loss improved from 2.00871 to 1.89584, saving model to weights-03-1.896.hdf5
Epoch 4/30

Epoch 00004: loss improved from 1.89584 to 1.80656, saving model to weights-04-1.807.hdf5
Epoch 5/30

Epoch 00005: loss improved from 1.80656 to 1.73977, saving model to weights-05-1.740.hdf5
Epoch 6/30

Epoch 00006: loss improved from 1.73977 to 1.68214, saving model to weights-06-1.682.hdf5
Epoch 7/30

Epoch 00007: loss improved from 1.68214 to 1.62910, saving model to weights-07-1.629.hdf5
Epoch 8/30

Epoch 00008: loss improved from 1.62910 to 1.57913, saving model to weights-08-1.579.hdf5
Epoch 9/30

Epoch 00009: loss improved from 1.57913 to 1.53409, saving model to weights-09-1.534.hdf5
Epoch 10/30

Epoch 00010: loss improved from 1.53409 to 1.48826, saving model to weigh

<keras.callbacks.History at 0x7f98040b7f28>

## Generating

In [41]:
from keras.models import model_from_yaml

with open("model.yaml") as model_file:
    architecture = model_file.read()

model = model_from_yaml(architecture)
model.load_weights("weights-30-0.713.hdf5")
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [56]:
from random import randint
corpus_length = len(corpus)
#seed = randint(0, corpus_length - sentence_length)
seed = 1000
seed_phrase = corpus[seed:seed + sentence_length]
print('Start the generating with the seed phrase:\n')
print(seed_phrase)

X = np.zeros((1, sentence_length, num_chars), dtype=np.bool)
for i, character in enumerate(seed_phrase):
    X[0, i, encoding[character]] = 1

Start the generating with the seed phrase:

 thy beauty's use,
If thou couldst answer 'This fa


In [57]:
from tqdm import tqdm_notebook as tqdm

# small loop illustrating the generation process
generated_text = ""
for i in range(5):
    print('\n Input characters:\n')
    print("".join([decoding[i] for i in np.argmax(X[0,:,:],axis=1)]))
    
    prediction = np.argmax(model.predict(X, verbose=0))
    
    print(f'\n Predicted character: {decoding[prediction]}')

    generated_text += decoding[prediction]

    activations = np.zeros((1, 1, num_chars), dtype=np.bool)
    activations[0, 0, prediction] = 1
    X = np.concatenate((X[:, 1:, :], activations), axis=1)
    
pbar = tqdm(range(450))
for i in range(450):
    prediction = np.argmax(model.predict(X, verbose=0))

    generated_text += decoding[prediction]

    activations = np.zeros((1, 1, num_chars), dtype=np.bool)
    activations[0, 0, prediction] = 1
    X = np.concatenate((X[:, 1:, :], activations), axis=1)
    pbar.update()
pbar.close()

print('\n Final generated text:\n')
print(generated_text)


 Input characters:

 thy beauty's use,
If thou couldst answer 'This fa

 Predicted character: i

 Input characters:

thy beauty's use,
If thou couldst answer 'This fai

 Predicted character: r

 Input characters:

hy beauty's use,
If thou couldst answer 'This fair

 Predicted character:  

 Input characters:

y beauty's use,
If thou couldst answer 'This fair 

 Predicted character: h

 Input characters:

 beauty's use,
If thou couldst answer 'This fair h

 Predicted character: o


HBox(children=(IntProgress(value=0, max=450), HTML(value='')))



 Final generated text:

ir holds her prime,
Which hath heavy that make me with sweet soull
Of that which thou dost best earth than truth still,
And summer's lovion of your every pleasure,
When I more the summer's dost in his grown,
And thou art all myself doth stard and loven,
As shadow in your sweet state words he call
  And thou to have I hos excuse the state,
And thou art butted that the learnot comment
Dear heaven for myself with thee shall fave,
That do to me to the wor


**Exercise:** In the above example, we've generated text by making predictions and taking the character with the *maximum activation* at each point. In practice, more creative and interesting results can often be found by introducing some *randomness* into the generation. 

Instead of the `argmax` rule for selecting the predicted character at each point, experiment with introducing randomness into the generation process (for instance, choosing the predicted character with probability proportional to its softmax activation.