In [21]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [2]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [4]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [5]:
char_to_int

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 "'": 4,
 '(': 5,
 ')': 6,
 '*': 7,
 ',': 8,
 '-': 9,
 '.': 10,
 '0': 11,
 '3': 12,
 ':': 13,
 ';': 14,
 '?': 15,
 '[': 16,
 ']': 17,
 '_': 18,
 'a': 19,
 'b': 20,
 'c': 21,
 'd': 22,
 'e': 23,
 'f': 24,
 'g': 25,
 'h': 26,
 'i': 27,
 'j': 28,
 'k': 29,
 'l': 30,
 'm': 31,
 'n': 32,
 'o': 33,
 'p': 34,
 'q': 35,
 'r': 36,
 's': 37,
 't': 38,
 'u': 39,
 'v': 40,
 'w': 41,
 'x': 42,
 'y': 43,
 'z': 44}

In [7]:
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  144408
Total Vocab:  45


In [10]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  144308


In [11]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [12]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [13]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [14]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 00001: loss improved from inf to 2.96521, saving model to weights-improvement-01-2.9652.hdf5
Epoch 2/20
Epoch 00002: loss improved from 2.96521 to 2.76839, saving model to weights-improvement-02-2.7684.hdf5
Epoch 3/20
Epoch 00003: loss improved from 2.76839 to 2.65972, saving model to weights-improvement-03-2.6597.hdf5
Epoch 4/20
Epoch 00004: loss improved from 2.65972 to 2.57710, saving model to weights-improvement-04-2.5771.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.57710 to 2.51242, saving model to weights-improvement-05-2.5124.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.51242 to 2.45410, saving model to weights-improvement-06-2.4541.hdf5
Epoch 7/20
Epoch 00007: loss improved from 2.45410 to 2.39865, saving model to weights-improvement-07-2.3987.hdf5
Epoch 8/20
Epoch 00008: loss improved from 2.39865 to 2.34975, saving model to weights-improvement-08-2.3498.hdf5
Epoch 9/20
Epoch 00009: loss improved from 2.34975 to 2.30268, saving model to weights-impro

<tensorflow.python.keras.callbacks.History at 0x7f82488dbb90>

In [17]:
# load the network weights
filename = "weights-improvement-20-2.0158.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [18]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [22]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" ive it up,' alice replied: 'what's the answer?'

'i haven't the slightest idea,' said the hatter.

' "
ieme you mene the dormouse ' said the caterpillar.

'ie you d leter see toieteing ' said the manch hare.

'ieme you mene toe bene ' said the caterpillar.

'ie you d leter seen tou do a pirtle to tot,' said the caterpillar.

'ie you d leter see toieteing ' said the manch hare.

'ieme you mene toe bene ' said the caterpillar.

'ie you d leter seen tou do a pirtle to tot,' said the caterpillar.

'ie you d leter see toieteing ' said the manch hare.

'ieme you mene toe bene ' said the caterpillar.

'ie you d leter seen tou do a pirtle to tot,' said the caterpillar.

'ie you d leter see toieteing ' said the manch hare.

'ieme you mene toe bene ' said the caterpillar.

'ie you d leter seen tou do a pirtle to tot,' said the caterpillar.

'ie you d leter see toieteing ' said the manch hare.

'ieme you mene toe bene ' said the caterpillar.

'ie you d leter seen tou do a pirtle to tot,' 

###############
## Larger LSTM Recurrent Neural Network

In [23]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [24]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"

In [26]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks_list)

Total Characters:  144408
Total Vocab:  45
Total Patterns:  144308
Epoch 1/50
Epoch 00001: loss improved from inf to 2.78942, saving model to weights-improvement-01-2.7894-bigger.hdf5
Epoch 2/50
Epoch 00002: loss improved from 2.78942 to 2.41716, saving model to weights-improvement-02-2.4172-bigger.hdf5
Epoch 3/50
Epoch 00003: loss improved from 2.41716 to 2.21646, saving model to weights-improvement-03-2.2165-bigger.hdf5
Epoch 4/50
Epoch 00004: loss improved from 2.21646 to 2.08522, saving model to weights-improvement-04-2.0852-bigger.hdf5
Epoch 5/50
Epoch 00005: loss improved from 2.08522 to 1.98235, saving model to weights-improvement-05-1.9824-bigger.hdf5
Epoch 6/50
Epoch 00006: loss improved from 1.98235 to 1.90792, saving model to weights-improvement-06-1.9079-bigger.hdf5
Epoch 7/50
Epoch 00007: loss improved from 1.90792 to 1.84231, saving model to weights-improvement-07-1.8423-bigger.hdf5
Epoch 8/50
Epoch 00008: loss improved from 1.84231 to 1.78722, saving model to weights-imp

Epoch 32/50
Epoch 00032: loss improved from 1.29864 to 1.29148, saving model to weights-improvement-32-1.2915-bigger.hdf5
Epoch 33/50
Epoch 00033: loss improved from 1.29148 to 1.28140, saving model to weights-improvement-33-1.2814-bigger.hdf5
Epoch 34/50
Epoch 00034: loss improved from 1.28140 to 1.27649, saving model to weights-improvement-34-1.2765-bigger.hdf5
Epoch 35/50
Epoch 00035: loss improved from 1.27649 to 1.26477, saving model to weights-improvement-35-1.2648-bigger.hdf5
Epoch 36/50
Epoch 00036: loss improved from 1.26477 to 1.26118, saving model to weights-improvement-36-1.2612-bigger.hdf5
Epoch 37/50
Epoch 00037: loss improved from 1.26118 to 1.25411, saving model to weights-improvement-37-1.2541-bigger.hdf5
Epoch 38/50
Epoch 00038: loss improved from 1.25411 to 1.25200, saving model to weights-improvement-38-1.2520-bigger.hdf5
Epoch 39/50
Epoch 00039: loss improved from 1.25200 to 1.24294, saving model to weights-improvement-39-1.2429-bigger.hdf5
Epoch 40/50
Epoch 00040:

<tensorflow.python.keras.callbacks.History at 0x7f81c034fb90>

In [27]:
# load the network weights
filename = "weights-improvement-50-1.2028-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" ly, and the queen was
silent.

the king laid his hand upon her arm, and timidly said 'consider, my
d "
ear, i wish you wouldn't be a lowse--and the sabbit had a little bat it it to sell you coul here?'

'i'd rather not,' said the caterpillar.

'well, i've sried to say " sie said to herself, 'they don't tee some minutes that makes them bone,'

'i don't know it was your taid,' said the caterpillar.

'well, it surne the sea, she much surtle sime the had not ro done the thing as the court, and the sable sealy ruite all the white rabbit say and san and steeenly as the court, and the three gardeners its louth and said, 'it she sand the thing the white rabbit sald to herself, and she thought to her hn the sime it would be a lowse that she was not a minute or two, and the three gardeners instantly was the fatthr, and then they was not a mittle beaodererl shat it was the first right into the sable for the wood, 'if you don't lnow the mittle door better now,'

'i'd nadr wo het to the sha

In [28]:
# load the network weights
filename = "weights-improvement-50-1.2028-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" ne of the guinea-pigs cheered, and was immediately suppressed by
the officers of the court. (as that "
 i had as it mane you don't be a dauchtly wask to her to in that '

'i wish you wouldn't hise your hands,' said the king.

'it was the soiderens said with the bantte,' said the caterpillar.

'well, i've sried to say " sie said to herself, 'they don't tee some minutes that makes them bone,'

'i don't know it was your taid,' said the caterpillar.

'well, it surne the sea, she much surtle sime the had not ro done the thing as the court, and the sable sealy ruite all the white rabbit say and san and steeenly as the court, and the three gardeners its louth and said, 'it she sand the thing the white rabbit sald to herself, and she thought to her hn the sime it would be a lowse that she was not a minute or two, and the three gardeners instantly was the fatthr, and then they was not a mittle beaodererl shat it was the first right into the sable for the wood, 'if you don't lnow the mit