# Text Generation With LSTM Recurrent Neural Networks with Keras

In [42]:
# Small LSTM Network
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [43]:
# load ascii text and covert to lowercase
filename = ('E:\input.txt')
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [44]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [45]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  15640
Total Vocab:  42


In [46]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 150
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  15490


In [47]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

In [48]:
# normalize
X = X / float(n_vocab)

In [49]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [50]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [51]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_20 (LSTM)               (None, 256)               264192    
_________________________________________________________________
dropout_20 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 42)                10794     
Total params: 274,986
Trainable params: 274,986
Non-trainable params: 0
_________________________________________________________________


In [52]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [53]:
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 3.01706, saving model to weights-improvement-01-3.0171.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.01706 to 2.94237, saving model to weights-improvement-02-2.9424.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.94237 to 2.93132, saving model to weights-improvement-03-2.9313.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.93132 to 2.92020, saving model to weights-improvement-04-2.9202.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.92020 to 2.89337, saving model to weights-improvement-05-2.8934.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.89337 to 2.84449, saving model to weights-improvement-06-2.8445.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.84449 to 2.82520, saving model to weights-improvement-07-2.8252.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.82520 to 2.80146, saving model to weights-improvement-08-2.8015.hdf5
Epoch 9/20

Epoch 00009: loss improved from 2.80146 to 2.78929, saving model to weig

<keras.callbacks.History at 0x29f96d99a90>

In [54]:
# load the network weights
filename = "weights-improvement-20-2.6501.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [55]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
" and tasty packed well and arrive in a timely manner
i bought these for my husband who is currently overseas he loves these and apparently his staff li "


In [56]:
import sys
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

 the bod the toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe toe

In [53]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [54]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [56]:
# fit the model
model.fit(X, y, epochs=30, batch_size=64, callbacks=callbacks_list)

Epoch 1/30

Epoch 00001: loss improved from 2.57449 to 2.49017, saving model to weights-improvement-01-2.4902-bigger.hdf5
Epoch 2/30

Epoch 00002: loss improved from 2.49017 to 2.41456, saving model to weights-improvement-02-2.4146-bigger.hdf5
Epoch 3/30

Epoch 00003: loss improved from 2.41456 to 2.34350, saving model to weights-improvement-03-2.3435-bigger.hdf5
Epoch 4/30

Epoch 00004: loss improved from 2.34350 to 2.27783, saving model to weights-improvement-04-2.2778-bigger.hdf5
Epoch 5/30

Epoch 00005: loss improved from 2.27783 to 2.21575, saving model to weights-improvement-05-2.2158-bigger.hdf5
Epoch 6/30

Epoch 00006: loss improved from 2.21575 to 2.16320, saving model to weights-improvement-06-2.1632-bigger.hdf5
Epoch 7/30

Epoch 00007: loss improved from 2.16320 to 2.11102, saving model to weights-improvement-07-2.1110-bigger.hdf5
Epoch 8/30

Epoch 00008: loss improved from 2.11102 to 2.06499, saving model to weights-improvement-08-2.0650-bigger.hdf5
Epoch 9/30

Epoch 00009:

<keras.callbacks.History at 0x2a7878d6550>

In [57]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [62]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
"  related to formula changes in the past. unfortunately, i now need to find a new food that my cats will eat.
good flavor! these came securely packed.. "


In [63]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

 i would nut can iete and brown sugar on the same for a long come dook aod i wes really love then all of the taste is ooe of the seeeneo and it sastes tian the steat fren saste great. the price ard seally gat a bate of the sarte in the same for a long come dook and i wes a sially orocuct for a long 
Done.


# Part B - Activation Function

In [61]:
# Changing Activation function from Softmax to ReLU
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='relu'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [62]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_22 (LSTM)               (None, 256)               264192    
_________________________________________________________________
dropout_22 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 42)                10794     
Total params: 274,986
Trainable params: 274,986
Non-trainable params: 0
_________________________________________________________________


In [63]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 3.88196, saving model to weights-improvement-01-3.8820.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.88196 to 3.54549, saving model to weights-improvement-02-3.5455.hdf5
Epoch 3/20
 2432/15490 [===>..........................] - ETA: 4:12 - loss: 3.5466

In [None]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

In [None]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

# Part C - Cost Function

In [38]:
# define the LSTM model
# Changing Cost function from crossentropy to Hinge
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(100, activation='softmax'))
model.compile(loss='hinge', optimizer='adam')

In [None]:
model.summary()

In [None]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

In [None]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='hinge', optimizer='adam')

In [None]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

In [None]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

# Part D - Epoch

In [None]:
# define the LSTM model
# Changing Epoch and Batch size from 20 & 128 to 30 & 64
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(100, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
model.summary()

In [None]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
model.fit(X, y, epochs=30, batch_size=64, callbacks=callbacks_list)

In [None]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

In [None]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

# Part E - Gradient Estimation

In [None]:
# define the LSTM model
# Changing Gradient Estimation from Adam to Adadelta
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(100, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adadelta')

In [None]:
model.summary()

In [None]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

In [None]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adadelta')

In [None]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

In [None]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

# Part F - Network Architecture

In [None]:
# define the LSTM model
#Changing the network architecture by adding one more hidden layer with ReLU activation function, changing the probability to 015 from 0.20, changing memory units from 256 to 100
model = Sequential()
model.add(LSTM(100, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.15))
model.add(LSTM(100))
model.add(Dropout(0.15))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='softmax'))
print(model.summary())

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
model.summary()

In [None]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

In [None]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

In [None]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

# Part G - Network Initialization

In [None]:
keras.initializers.Initializer()
keras.initializers.Zeros()
keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None)

In [None]:
# define the LSTM model
# Changing the weight and bias as per our wish; weight as random unifrom and bias as zero
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax',kernel_initializer='random_uniform',bias_initializer='zeros' ))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
model.summary()

In [None]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

In [None]:
# load the network weights
filename = "weights-improvement-30-1.4392-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

In [None]:
import sys
# generate characters
for i in range(300):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")