In [165]:
import pandas
import numpy
import random
import sys

from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, GRU
from keras.optimizers import RMSprop, Adam



In [163]:
cd ..

/Users/mmccurdy/code/starwars-namer


In [164]:
from swnamer.process import chunk_names, create_indices

In [130]:
legend_names = pandas.read_csv('../data/legend_names.csv')

In [131]:
canon_names = pandas.read_csv('../data/cannon_names.csv')

In [132]:
cast_names = pandas.read_csv('../data/cast_names.csv')

In [133]:
clone_wars_names = pandas.read_csv('../data/clone_wars.csv')

In [134]:
kotor_names = pandas.read_csv('../data/kotor.csv')

In [135]:
combined = pandas.concat((legend_names, canon_names, cast_names, clone_wars_names, kotor_names))

In [136]:
combined = combined.reset_index(drop=True)

In [137]:
combined.shape

(1101, 1)

In [138]:
combined.drop_duplicates().shape

(763, 1)

In [139]:
combined = combined.drop_duplicates()

In [140]:
combined

Unnamed: 0,name
0,8t88
1,Abeloth
2,King Adas
3,Darth Andeddu
4,Nom Anor
5,Bail Antilles
6,Arcann
7,Seti Ashgad
8,Attichitcuk
9,Tavion Axmis


In [141]:
combined.loc[:, 'name'] = combined.name.str.lower()

In [142]:
combined.loc[:, 'length'] = combined.name.str.len()

In [143]:
combined.length.max()

27

In [144]:
combined[combined.length == 27]

Unnamed: 0,name,length
166,kybo ren (gir kybo ren-cha),27
314,lieutenant kaydel ko connix,27
482,admiral conan antonio motti,27
874,queen breha antilles organa,27


In [145]:
combined.to_csv('../output/starwars_processed.csv', index=False)

In [166]:
token_to_index, index_to_token = create_indices(combined, 'name')

In [167]:
chars = token_to_index.keys()

In [168]:
vocab_size = len(chars)
vocab_size

48

In [178]:
timesteps = 1

In [179]:
chunks, next_char = chunk_names(combined, 'name', timesteps)

In [180]:
X = numpy.zeros((len(chunks), timesteps, vocab_size))
y = numpy.zeros((len(chunks), vocab_size))

In [181]:
for i, chunk in enumerate(chunks):
    for t, token in enumerate(chunk):
        index = token_to_index[token]
        X[i, t, index] = 1
    index = token_to_index[next_char[i]]
    y[i, index] = 1

In [183]:
model = Sequential()
model.add(GRU(128, input_shape=(timesteps, vocab_size), return_sequences=True))
model.add(GRU(128))
model.add(Dense(vocab_size))
model.add(Activation('softmax'))
optimizer = RMSprop(lr=.1, clipvalue=12)
model.compile(optimizer, 'categorical_crossentropy')

In [184]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = numpy.asarray(preds).astype('float64')
    preds = numpy.log(preds) / temperature
    exp_preds = numpy.exp(preds)
    preds = exp_preds / numpy.sum(exp_preds)
    probas = numpy.random.multinomial(1, preds, 1)
    return numpy.argmax(probas)


def on_epoch_end(epoch, logs):
    if epoch % 10 != 0:
        return
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(chunks) - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = chunks[start_index]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(50):
            x_pred = numpy.zeros((1, timesteps, vocab_size))
            for t, char in enumerate(sentence):
                x_pred[0, t, token_to_index[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = index_to_token[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char
            
            if next_char == '\n':
                break

            sys.stdout.write(next_char)
            sys.stdout.flush()
    
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [185]:
model.fit(X, y, epochs=200, batch_size=128, callbacks=[print_callback])

Epoch 1/200

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "a"
ararrrrrrarrrrrarrrrrrrrrararararrrrarrrrrrrarrarar
----- diversity: 0.5
----- Generating with seed: "a"
as
----- diversity: 1.0
----- Generating with seed: "a"
arsssanelleradin s
----- diversity: 1.2
----- Generating with seed: "a"
ararernepeanga
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

----- Generating text after Epoch: 10
----- diversity: 0.2
----- Generating with seed: "a"
aporino
----- diversity: 0.5
----- Generating with seed: "a"
ape o orapazespapo
----- diversity: 1.0
----- Generating with seed: "a"
aroomean
----- diversity: 1.2
----- Generating with seed: "a"
anane"to zus
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200

----- Generating text after Epoch: 20
----- diversity: 0.2
----- Generating with seed: "o"
omomo

  after removing the cwd from sys.path.


s
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200

----- Generating text after Epoch: 60
----- diversity: 0.2
----- Generating with seed: " "
 de de
----- diversity: 0.5
----- Generating with seed: " "
 dee
----- diversity: 1.0
----- Generating with seed: " "
 deen
----- diversity: 1.2
----- Generating with seed: " "
 t dex-goske
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200

----- Generating text after Epoch: 70
----- diversity: 0.2
----- Generating with seed: "o"
o
----- diversity: 0.5
----- Generating with seed: "o"
o
----- diversity: 1.0
----- Generating with seed: "o"
o twaln
----- diversity: 1.2
----- Generating with seed: "o"
onoan
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200

----- Generating text after Epoch: 80
----- diversity: 0

wer ro bo baelffffr jereeraa
----- diversity: 1.0
----- Generating with seed: "w"
we blron baloole dammorraeriaamioornorin da jooun t
----- diversity: 1.2
----- Generating with seed: "w"
we
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200

----- Generating text after Epoch: 130
----- diversity: 0.2
----- Generating with seed: "m"
mereeee teee tten teereessteel ten teeren teee ttte
----- diversity: 0.5
----- Generating with seed: "m"
mten tttee toooo ttese reratosan tereen tten o tare
----- diversity: 1.0
----- Generating with seed: "m"
ma thlmrttanthn toly
----- diversity: 1.2
----- Generating with seed: "m"
m
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200

----- Generating text after Epoch: 140
----- diversity: 0.2
----- Generating with seed: "r"
r thas
----- diversity: 0.5
----- Generating with seed: "r

ariliririreris
----- diversity: 0.5
----- Generating with seed: "a"
aner
----- diversity: 1.0
----- Generating with seed: "a"
air
----- diversity: 1.2
----- Generating with seed: "a"
ami beywacgoiodedd
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x12b891b38>