## Preparing Data

In [1]:
import keras
import numpy as np

# path = keras.utils.get_file(
#    'nietzsche.txt',
#    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
# text = open(path).read().lower()
text = open("/home/trp22/CS/344/cs344/Project/OriginalTrilogy_script.txt").read().lower()
# text = open("/Users/tylerpoel/SW_script.txt").read().lower()
print('Corpus length:', len(text))

Using TensorFlow backend.


Corpus length: 494395


In [2]:
#print(text)

In [3]:

# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 164779
Unique characters: 60
Vectorization...


## Building the network

In [4]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [5]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

## Training the model

In [6]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random
import sys

for epoch in range(1, 61):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    if epoch % 5 == 0:
        # Select a text seed at random
        start_index = random.randint(0, len(text) - maxlen - 1)
        generated_text = text[start_index: start_index + maxlen]
        print('--- Generating with seed: "' + generated_text + '"')

        for temperature in [0.2, 0.5]:
            print('------ temperature:', temperature)
            sys.stdout.write(generated_text)

            # We generate 400 characters
            for i in range(400):
                sampled = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(generated_text):
                    sampled[0, t, char_indices[char]] = 1.

                preds = model.predict(sampled, verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = chars[next_index]

                generated_text += next_char
                generated_text = generated_text[1:]

                sys.stdout.write(next_char)
                sys.stdout.flush()
            print()


epoch 1
Epoch 1/1
epoch 2
Epoch 1/1
--- Generating with seed: "he blast doors!
at the end of the hallway, blast doors begin"
------ temperature: 0.2
he blast doors!
at the end of the hallway, blast doors begin to the ship and the troops and and the ship stands and stands and stands around.

luke
the ship and the hutt and the ship and the ship and the ship through the fighters and and the rebel scouts from the force.  the falcon stands on the ship.

luke
the stormtroopers and back to the back and stands and stands at the force.  he stranged and the force is the stormtroopers and back to the controls and
------ temperature: 0.5
 the force is the stormtroopers and back to the controls and chances at the ship of the scout.  he is snow and passabler of the troops.  but the fighters not again the huge banks and bobas 
shamplishes to a massing energy ship and and stands for you. â¨
381 â int. dead east - once of the deaders - rebel star is uncer past on the floor.  the end of the fighters st

  This is separate from the ipykernel package so we can avoid doing imports until
