## Preparing Data

In [6]:
import keras
import numpy as np

# path = keras.utils.get_file(
#    'nietzsche.txt',
#    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
# text = open(path).read().lower()
text = open("/home/trp22/CS/344/cs344/Project/StarWars_script.txt").read().lower()
# text = open("/Users/tylerpoel/SW_script.txt").read().lower()
print('Corpus length:', len(text))

Corpus length: 196731


In [7]:
#print(text)

In [8]:

# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 65557
Unique characters: 56
Vectorization...


## Building the network

In [9]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [10]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

## Training the model

In [11]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
import random
import sys

for epoch in range(1, 3):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()


epoch 1
Epoch 1/1
--- Generating with seed: "ces, rushes up to the group and gives leia a big hug. every "
------ temperature: 0.2
ces, rushes up to the group and gives leia a big hug. every all ther side on ther wiggt ther side ther stard all giggs ther side of ther ther shoke all ther seader all sealling all band all bear liggs ther ther side all gater on ther bear all gher side all ther bears all bege to ther beader stard all hear ther seader all begggs all all leader all stard all stard as here all all giggs ther sight a looks ther seaker with stard all all bear all ben ther paine
------ temperature: 0.5
looks ther seaker with stard all all bear all ben ther paine in ther side therp or there all all fight eaples as there sire bearnoof gather likes beld mor ben ther way with star is around ther sedelly blating all singes leader his all mater gor bagg all thee sooken ally ther here bece be ther beray olf hard wher allool mecon ther plabes higg blats ther continged way ther hears lake