In [2]:
from tensorflow.keras.utils import get_file
import numpy as np

path = get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893


In [4]:
maxlen = 60                                                            
step = 3                                                               

sentences = []                                                         

next_chars = []                                                        

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print('Number of sequences:', len(sentences))

chars = sorted(list(set(text)))                                        
print('Unique characters:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)       

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)      
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)              
for i, sentence in enumerate(sentences):                               
    for t, char in enumerate(sentence):                                
        x[i, t, char_indices[char]] = 1                                
    y[i, char_indices[next_chars[i]]] = 1    

print('Done.')

Number of sequences: 200278
Unique characters: 57
Vectorization...
Done.


In [7]:
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential

model = Sequential([
  LSTM(128, input_shape=(maxlen, len(chars))),
  Dense(len(chars), activation='softmax')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               95232     
_________________________________________________________________
dense_1 (Dense)              (None, 57)                7353      
Total params: 102,585
Trainable params: 102,585
Non-trainable params: 0
_________________________________________________________________


In [8]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='RMSProp'
    )

In [9]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random
import sys

for epoch in range(1, 60):                                          
  print('epoch', epoch)
  model.fit(x, y, batch_size=128, epochs=1)                       
  start_index = random.randint(0, len(text) - maxlen - 1)         
  generated_text = text[start_index: start_index + maxlen]        
  print('--- Generating with seed: "' + generated_text + '"')     
  for temperature in [0.2, 0.5, 1.0, 1.2]:                        
    print('------ temperature:', temperature)
    sys.stdout.write(generated_text)

    for i in range(400):                                        
      sampled = np.zeros((1, maxlen, len(chars)))             
      for t, char in enumerate(generated_text):               
        sampled[0, t, char_indices[char]] = 1.              

      preds = model.predict(sampled, verbose=0)[0]            
      next_index = sample(preds, temperature)                 
      next_char = chars[next_index]                           

      generated_text += next_char
      generated_text = generated_text[1:]

      sys.stdout.write(next_char)

epoch 1
--- Generating with seed: " these tasks; a transvaluation of values, under the new
pres"
------ temperature: 0.2
 these tasks; a transvaluation of values, under the new
preselite the and and the hing and the the the sente the the aras in the sone the and the and in the and in the and the sere the the sond the serest and and the sond the seres and in the the and and the seres of the sof and and in the the his and the the the ard of the selle the the sond the the as the the sond the the the the the the the hall the the the here the the the the songe the the soull the s------ temperature: 0.5
l the the the here the the the the songe the the soull the sericice tise the mon the and and the dere and and the sinde the abecsint deaone thelitte the berest end inone onthingen the ares, and seutily the the grtien the to ather the soul, and aled to the it of the reas of the has and alise to the therast and the greas of the soratits withe sends ind the sone the sion of poraulity the theed b