In [1]:
import tensorflow as tf

import numpy as np
import os
import time
import numpy as np
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Input, Embedding, Conv1D, GlobalMaxPool1D, SimpleRNN, LSTM, GRU, Masking
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import TensorBoard 
from keras.metrics import categorical_crossentropy
from keras.callbacks import EarlyStopping 

In [2]:
# path_to_file = 'Rf Zakony. Ugolovnyy kodeks RF - BooksCafe.Net.txt'
# text = open(path_to_file, 'rb').read().decode(encoding='Windows-1251')

path_to_file = 'evgenyi_onegin.txt'
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

In [3]:
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))

Length of text: 286984 characters


In [4]:
print(text[:500])

Александр Сергеевич Пушкин

                                Евгений Онегин
                                Роман в стихах

                        Не мысля гордый свет забавить,
                        Вниманье дружбы возлюбя,
                        Хотел бы я тебе представить
                        Залог достойнее тебя,
                        Достойнее души прекрасной,
                        Святой исполненной мечты,
                        Поэзии живой и ясной,
                        Высо


In [5]:
text = text + text

In [6]:
# The unique characters in the file
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

131 unique characters


In [7]:
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [8]:
text_as_int, len(text_as_int), len(text)

(array([ 71, 110, 104, ..., 104, 121,   0]), 573968, 573968)

### train and target

In [9]:
# The maximum length sentence you want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])

А
л
е
к
с


In [10]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'Александр Сергеевич Пушкин\n\n                                Евгений Онегин\n                          '
'      Роман в стихах\n\n                        Не мысля гордый свет забавить,\n                        '
'Вниманье дружбы возлюбя,\n                        Хотел бы я тебе представить\n                        '
'Залог достойнее тебя,\n                        Достойнее души прекрасной,\n                        Свят'
'ой исполненной мечты,\n                        Поэзии живой и ясной,\n                        Высоких д'


In [11]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

Print the first example input and target values:

In [12]:
for input_example, target_example in  dataset.take(1):
    print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'Александр Сергеевич Пушкин\n\n                                Евгений Онегин\n                         '
Target data: 'лександр Сергеевич Пушкин\n\n                                Евгений Онегин\n                          '


In [13]:
# Batch size
BATCH_SIZE = 64
EPOCHS = 500

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int32, tf.int32)>

In [14]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 50

# Number of RNN units
rnn_units = 1000

In [15]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Embedding(vocab_size, embedding_dim),
            
            tf.keras.layers.LSTM(rnn_units, return_sequences=True),
            tf.keras.layers.Dense(vocab_size)
        ]
    )
    return model
    
model = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 50)          6550      
_________________________________________________________________
lstm (LSTM)                  (None, None, 1000)        4204000   
_________________________________________________________________
dense (Dense)                (None, None, 131)         131131    
Total params: 4,341,681
Trainable params: 4,341,681
Non-trainable params: 0
_________________________________________________________________


In [16]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [17]:
model.compile(
    optimizer='adam', loss=loss)

early_stopping=EarlyStopping(
    monitor='loss',
    restore_best_weights=True, 
    patience=5, 
    mode='min',
    verbose=2
)

history = model.fit(dataset,
                    epochs=EPOCHS,
                    verbose=2,
                    callbacks=[early_stopping])

Epoch 1/500
88/88 - 11s - loss: 2.3901
Epoch 2/500
88/88 - 9s - loss: 1.7920
Epoch 3/500
88/88 - 9s - loss: 1.5951
Epoch 4/500
88/88 - 8s - loss: 1.4809
Epoch 5/500
88/88 - 8s - loss: 1.3894
Epoch 6/500
88/88 - 8s - loss: 1.3328
Epoch 7/500
88/88 - 9s - loss: 1.2953
Epoch 8/500
88/88 - 8s - loss: 1.2744
Epoch 9/500
88/88 - 8s - loss: 1.2408
Epoch 10/500
88/88 - 8s - loss: 1.2060
Epoch 11/500
88/88 - 8s - loss: 1.1826
Epoch 12/500
88/88 - 8s - loss: 1.1547
Epoch 13/500
88/88 - 8s - loss: 1.1448
Epoch 14/500
88/88 - 8s - loss: 1.1187
Epoch 15/500
88/88 - 8s - loss: 1.0872
Epoch 16/500
88/88 - 8s - loss: 1.0628
Epoch 17/500
88/88 - 8s - loss: 1.0705
Epoch 18/500
88/88 - 8s - loss: 1.0318
Epoch 19/500
88/88 - 8s - loss: 1.0006
Epoch 20/500
88/88 - 8s - loss: 0.9958
Epoch 21/500
88/88 - 8s - loss: 0.9699
Epoch 22/500
88/88 - 8s - loss: 0.9380
Epoch 23/500
88/88 - 8s - loss: 0.9214
Epoch 24/500
88/88 - 8s - loss: 0.8965
Epoch 25/500
88/88 - 8s - loss: 0.8683
Epoch 26/500
88/88 - 8s - loss: 0

In [18]:
def generate_text(model, start_string, temperature):
    # Evaluation step (generating text using the learned model)

    # Number of characters to generate
    num_generate = 500

    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    # Low temperature results in more predictable text.
    # Higher temperature results in more surprising text.
    # Experiment to find the best setting.
    temperature = temperature

    # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        # Pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [19]:
def diff_tempriture(model, temperatures=[0.5]):
    for temperature in temperatures:
        print(f'Temperature = {temperature}')
        text_ = generate_text(model, start_string=u"И вот идет уже корабль", temperature=temperature)
        print(text_)
        print('---------------------------------------------------------------------\n\n\n')

In [20]:
temperatures = [0.01, 0.1, 0.5, 0.9, 1.5, 2, 30]
diff_tempriture(model, temperatures)

Temperature = 0.01
И вот идет уже корабль
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
---------------------------------------------------------------------



Temperature = 0.1
И вот идет уже корабль
                                                                                                                                                                                                                                                                                                                                                        

In [21]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(
            vocab_size, 
            embedding_dim,
            batch_input_shape=[batch_size, None]),
                                 
        tf.keras.layers.LSTM(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),

        tf.keras.layers.LSTM(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),

         tf.keras.layers.LSTM(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        
        tf.keras.layers.LSTM(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
                                   
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [22]:
model1 = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)
    
model1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (64, None, 50)            6550      
_________________________________________________________________
lstm_1 (LSTM)                (64, None, 1000)          4204000   
_________________________________________________________________
lstm_2 (LSTM)                (64, None, 1000)          8004000   
_________________________________________________________________
lstm_3 (LSTM)                (64, None, 1000)          8004000   
_________________________________________________________________
lstm_4 (LSTM)                (64, None, 1000)          8004000   
_________________________________________________________________
dense_1 (Dense)              (64, None, 131)           131131    
Total params: 28,353,681
Trainable params: 28,353,681
Non-trainable params: 0
__________________________________________

In [23]:
model1.compile(
    optimizer='adam', loss=loss)

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_freq=5,
    save_weights_only=True)

history = model1.fit(dataset,
                    epochs=EPOCHS,
                    verbose=2,
                    callbacks=[checkpoint_callback, early_stopping])


Epoch 1/500
88/88 - 74s - loss: 2.2006
Epoch 2/500
88/88 - 70s - loss: 1.6170
Epoch 3/500
88/88 - 69s - loss: 1.4036
Epoch 4/500
88/88 - 69s - loss: 1.3306
Epoch 5/500
88/88 - 69s - loss: 1.3408
Epoch 6/500
88/88 - 67s - loss: 1.2906
Epoch 7/500
88/88 - 69s - loss: 1.2445
Epoch 8/500
88/88 - 68s - loss: 1.2210
Epoch 9/500
88/88 - 69s - loss: 1.1909
Epoch 10/500
88/88 - 69s - loss: 1.1452
Epoch 11/500
88/88 - 68s - loss: 1.1170
Epoch 12/500
88/88 - 69s - loss: 1.0835
Epoch 13/500
88/88 - 68s - loss: 1.0729
Epoch 14/500
88/88 - 69s - loss: 1.0489
Epoch 15/500
88/88 - 70s - loss: 0.9916
Epoch 16/500
88/88 - 68s - loss: 0.9611
Epoch 17/500
88/88 - 69s - loss: 0.9128
Epoch 18/500
88/88 - 68s - loss: 0.8583
Epoch 19/500
88/88 - 69s - loss: 0.8054
Epoch 20/500
88/88 - 69s - loss: 0.7532
Epoch 21/500
88/88 - 69s - loss: 0.7020
Epoch 22/500
88/88 - 70s - loss: 0.6418
Epoch 23/500
88/88 - 68s - loss: 0.5856
Epoch 24/500
88/88 - 70s - loss: 0.5372
Epoch 25/500
88/88 - 69s - loss: 0.4941
Epoch 26/

In [24]:
model1 = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model1.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model1.build(tf.TensorShape([1, None]))

In [25]:
diff_tempriture(model1, temperatures)

Temperature = 0.01
И вот идет уже корабль такитник простой.

                                     XLI

                        Под ним (как начинает капать
                        Весенний дождь на злак полей)
                        Пастух, плетя свой пестрый лапоть,
                        Поет про волжских рыбарей;
                        И горожанка молодая,
                        В деревне лето провождая,
                        Когда стремглав верхом она
                        Несется по полям одна,
                        Жоря
---------------------------------------------------------------------



Temperature = 0.1
И вот идет уже корабль тихонько отпирая,
                        Уж ей Филипьевна седая
                        Приносит на подносе чай.
                        "Пора, дитя мое, вставай:
                        Да ты, красавица, готова!
                        О пташка ранняя моя!
                        Вечор уж как боялась я!
                        Да, слава бог

In [26]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim),
                                 
        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=False,
                            recurrent_initializer='glorot_uniform'),

        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=False,
                            recurrent_initializer='glorot_uniform'),

         tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=False,
                            recurrent_initializer='glorot_uniform'),
                                   
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [27]:
model2 = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)


model2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 50)          6550      
_________________________________________________________________
gru (GRU)                    (None, None, 1000)        3156000   
_________________________________________________________________
gru_1 (GRU)                  (None, None, 1000)        6006000   
_________________________________________________________________
gru_2 (GRU)                  (None, None, 1000)        6006000   
_________________________________________________________________
dense_3 (Dense)              (None, None, 131)         131131    
Total params: 15,305,681
Trainable params: 15,305,681
Non-trainable params: 0
_________________________________________________________________


In [28]:
model2.compile(
    optimizer='adam', loss=loss)
    
history = model2.fit(dataset,
                    epochs=EPOCHS,
                    verbose=2,
                    callbacks=[early_stopping])


Epoch 1/500
88/88 - 26s - loss: 2.1769
Epoch 2/500
88/88 - 23s - loss: 1.5394
Epoch 3/500
88/88 - 23s - loss: 1.3811
Epoch 4/500
88/88 - 23s - loss: 1.2728
Epoch 5/500
88/88 - 23s - loss: 1.2046
Epoch 6/500
88/88 - 23s - loss: 1.1362
Epoch 7/500
88/88 - 23s - loss: 1.0619
Epoch 8/500
88/88 - 23s - loss: 0.9746
Epoch 9/500
88/88 - 23s - loss: 0.8994
Epoch 10/500
88/88 - 23s - loss: 0.8213
Epoch 11/500
88/88 - 23s - loss: 0.9209
Epoch 12/500
88/88 - 23s - loss: 0.7964
Epoch 13/500
88/88 - 23s - loss: 0.6615
Epoch 14/500
88/88 - 23s - loss: 0.5701
Epoch 15/500
88/88 - 23s - loss: 0.4918
Epoch 16/500
88/88 - 23s - loss: 0.4200
Epoch 17/500
88/88 - 23s - loss: 0.3455
Epoch 18/500
88/88 - 23s - loss: 0.2866
Epoch 19/500
88/88 - 23s - loss: 0.2977
Epoch 20/500
88/88 - 23s - loss: 0.2665
Epoch 21/500
88/88 - 23s - loss: 0.2087
Epoch 22/500
88/88 - 23s - loss: 0.1858
Epoch 23/500
88/88 - 23s - loss: 0.1684
Epoch 24/500
88/88 - 23s - loss: 0.1552
Epoch 25/500
88/88 - 23s - loss: 0.1436
Epoch 26/

In [29]:
diff_tempriture(model2, temperatures)

Temperature = 0.01
И вот идет уже корабль
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
---------------------------------------------------------------------



Temperature = 0.1
И вот идет уже корабль
                                                                                                                                                                                                                                                                                                                                                        

Подозрительно разные результаты, 2 сетки вообще почти не работают, а одна прямо ка кпушкин,что-то тут не так. Третья сетка вроде не сильно уступает по мощьности второй но показывает, в виде результата чушь  на любой температуре. Не понимаю почему так происходит. Так же не понимаю почему несколкьо настаканых уровней лучше чем один. По сути же они уже ухудшают кажество информации. 