In [0]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku 
import numpy as np

In [55]:
tokenizer = Tokenizer()
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \
    -O /tmp/sonnets.txt
data = open('/tmp/sonnets.txt').read()

corpus = data.lower().split("\n")


tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	print(token_list)
	for i in range(1, len(token_list)):
		next_sequence = token_list[:i+1]
		input_sequences.append(next_sequence)


max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)

--2020-03-14 17:40:31--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.216.128, 2607:f8b0:400c:c13::80
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.216.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 93578 (91K) [text/plain]
Saving to: ‘/tmp/sonnets.txt’


2020-03-14 17:40:31 (164 MB/s) - ‘/tmp/sonnets.txt’ saved [93578/93578]

[34, 417, 877, 166, 213, 517]
[8, 878, 134, 351, 102, 156, 199]
[16, 22, 2, 879, 61, 30, 48, 634]
[25, 311, 635, 102, 200, 25, 278]
[16, 10, 880, 3, 62, 85, 214, 53]
[1372, 9, 1373, 636, 11, 122, 1374, 1375]
[201, 17, 1376, 64, 518, 202]
[118, 9, 1377, 3, 9, 47, 122, 135, 279]
[10, 8, 54, 63, 2, 418, 312, 419]
[1, 352, 1378, 3, 2, 1379, 420]
[215, 62, 85, 881, 1380, 9, 882]
[1, 311, 883, 884, 313, 7, 1381]
[257, 2, 94, 36, 353, 29, 1382, 21]
[3, 637, 2, 418, 354, 30, 2, 638, 1, 19]
[27, 1383, 885, 46, 1384, 9, 

In [56]:
model = Sequential()
model.add(Embedding(total_words, 50, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dropout(0.2))
model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 10, 50)            160550    
_________________________________________________________________
bidirectional_5 (Bidirection (None, 10, 300)           241200    
_________________________________________________________________
dropout_10 (Dropout)         (None, 10, 300)           0         
_________________________________________________________________
lstm_14 (LSTM)               (None, 100)               160400    
_________________________________________________________________
dropout_11 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 1605)              162105    
_________________________________________________________________
dense_14 (Dense)             (None, 3211)             

In [0]:
 history = model.fit(predictors, label, epochs=100, verbose=1)

Train on 15462 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100

In [0]:
import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()
  
plot_graphs(history, "acc")
plot_graphs(history, "loss")