In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
filename = "Downloads/alice-in-wonderland.txt"
raw_text = open(filename,encoding='utf8').read()
raw_text = raw_text.lower()
raw_text = raw_text.replace('\n',' ')

In [3]:
len(raw_text)

144348

In [4]:
raw_text[:500]

'\ufeffchapter i. down the rabbit-hole  alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, ‘and what is the use of a book,’ thought alice ‘without pictures or conversations?’  so she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain '

In [5]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [6]:
char_to_int

{' ': 0,
 '!': 1,
 '(': 2,
 ')': 3,
 '*': 4,
 ',': 5,
 '-': 6,
 '.': 7,
 ':': 8,
 ';': 9,
 '?': 10,
 '[': 11,
 ']': 12,
 '_': 13,
 'a': 14,
 'b': 15,
 'c': 16,
 'd': 17,
 'e': 18,
 'f': 19,
 'g': 20,
 'h': 21,
 'i': 22,
 'j': 23,
 'k': 24,
 'l': 25,
 'm': 26,
 'n': 27,
 'o': 28,
 'p': 29,
 'q': 30,
 'r': 31,
 's': 32,
 't': 33,
 'u': 34,
 'v': 35,
 'w': 36,
 'x': 37,
 'y': 38,
 'z': 39,
 '‘': 40,
 '’': 41,
 '“': 42,
 '”': 43,
 '\ufeff': 44}

In [7]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  144348
Total Vocab:  45


In [8]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  144248


In [9]:
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [10]:
X.shape

(144248, 100, 1)

In [11]:
y.shape

(144248, 44)

In [12]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [13]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [14]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x264e72fec18>

In [15]:
raw_text[0:100]

'\ufeffchapter i. down the rabbit-hole  alice was beginning to get very tired of sitting by her sister on '

In [16]:
raw_text[104]

'b'

In [22]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [26]:
import sys

In [27]:
# pick a random seed
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" ally see shakespeare, in the pictures of him), while the rest waited in silence. at last the dodo sa "
ld, ‘it was a latter of the soeerers,’  ‘ie ion’t tie kine an thlleng’t tone,’ said alice, an at oncee at the could, and whs gerting to an  lh the dour oe the court,  nie had aeen ennnid on the sase an in white was toe tithle thet was a lirtle soinei  the was a lirtle soine to bn the woode  and the was a lirtle soine to bn the woode  she was a lirtle soine to be and then th the whyh the was a lirtle so tat anong the taade  the coum on the tase wire tiee   ‘he inr toe was at il ’hu it as i sas io a fott ’eth she boore!’  ‘ho whet would ’ou goon the dan’t ’hu would ’ou coold,’ said the caterpillar.  ‘you mave you moen,’ said the caterpillar.  ‘you mave you moen,’ said the caterpillar.  ‘you mave you moen,’ said the caterpillar.  ‘you mave you moen,’ said the caterpillar.  ‘you mave you moen,’ said the caterpillar.  ‘you mave you moen,’ said the caterpillar.  ‘you mave you moen,’