In [3]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

In [13]:
with open('onegin.txt', 'r', encoding="utf8") as f:
    text=f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [14]:
len(vocab)

148

In [15]:
def get_batches(arr,n_seqs,n_steps):
    '''Создаем генератор, который возвращает пакеты размером
       n_seqs x n_steps из массива arr.
       
       Аргументы
       ---------
       arr: Массив, из которого получаем пакеты
       n_seqs: Batch size, количество последовательностей в пакете
       n_steps: Sequence length, сколько "шагов" делаем в пакете
    '''
    # Считаем количество символов на пакет и количество пакетов, которое можем сформировать
    characters_per_batch = n_seqs * n_steps
    n_batches = len(arr)//characters_per_batch  
    
    # Сохраняем в массиве только символы, которые позволяют сформировать целое число пакетов
    arr = arr[:n_batches * characters_per_batch]
    
    # Делаем reshape 1D -> 2D, используя n_seqs как число строк, как на картинке
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # пакет данных, который будет подаваться на вход сети
        x = arr[:, n:n+n_steps]
        # целевой пакет, с которым будем сравнивать предсказание, получаем сдвиганием "x" на один символ вперед
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [16]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
print('x\n', x[:5, :5])
print('\ny\n', y[:5, :5])

x
 [[ 32  54   1 121 136]
 [123 113 123 118   8]
 [109 126 137  21   0]
 [122 114 127   6   1]
 [111 109 120   1 123]]

y
 [[ 54   1 121 136 126]
 [113 123 118   8   0]
 [126 137  21   0   0]
 [114 127   6   1 138]
 [109 120   1 123 122]]


In [17]:
def build_inputs(batch_size, num_steps):
    ''' Определяем placeholder'ы для входных, целевых данных, а также вероятности drop out
    
        Аргументы
        ---------
        batch_size: Batch size, количество последовательностей в пакете
        num_steps: Sequence length, сколько "шагов" делаем в пакете
        
    '''
    # Объявляем placeholder'ы
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    
    # Placeholder для вероятности drop out
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob

In [18]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Строим LSTM ячейку.
    
        Аргументы
        ---------
        keep_prob: Скаляр (tf.placeholder) для dropout keep probability
        lstm_size: Размер скрытых слоев в LSTM ячейках
        num_layers: Количество LSTM слоев
        batch_size: Batch size

    '''
    def build_cell(lstm_size, keep_prob):
        # Начинаем с базовой LSTM ячейки
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        
        # Добавляем dropout к ячейке
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
    
    # Стэкируем несколько LSTM слоев для придания глубины нашему deep learning
    cell = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    # Инициализируем начальное состояние LTSM ячейки
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [19]:
def build_output(lstm_output, in_size, out_size):
    ''' Строим softmax слой и возвращаем результат его работы.
    
        Аргументы
        ---------
        
        x: Входящий от LSTM тензор
        in_size: Размер входящего тензора, (кол-во LSTM юнитов скрытого слоя)
        out_size: Размер softmax слоя (объем словаря)
    
    '''

    # вытягиваем и решэйпим тензор, выполняя преобразование 3D -> 2D
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    # Соединяем результат LTSM слоев с softmax слоем
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # Считаем logit-функцию
    logits = tf.matmul(x, softmax_w) + softmax_b
    # Используем функцию softmax для получения предсказания
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [20]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Считаем функцию потери на основании значений logit-функции и целевых значений.
    
        Аргументы
        ---------
        logits: значение logit-функции
        targets: целевые значения, с которыми сравниваем предсказания
        lstm_size: Количество юнитов в LSTM слое
        num_classes: Количество классов в целевых значениях (размер словаря)
        
    '''
    # Делаем one-hot кодирование целевых значений и решейпим по образу и подобию logits
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Считаем значение функции потери softmax cross entropy loss и возвращаем среднее значение
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

In [21]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Строим оптимизатор для обучения, используя обрезку градиента.
    
        Arguments:
        loss: значение функции потери
        learning_rate: параметр скорости обучения
    
    '''
    
    # Оптимизатор для обучения, обрезка градиента для контроля "взрывающихся" градиентов
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [22]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # Мы будем использовать эту же сеть для сэмплирования (генерации текста),
        # при этом будем подавать по одному символу за один раз
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Получаем input placeholder'ы
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Строим LSTM ячейку
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Прогоняем данные через RNN слои
        # Делаем one-hot кодирование входящих данных
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Прогоняем данные через RNN и собираем результаты
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Получаем предсказания (softmax) и результат logit-функции
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Считаем потери и оптимизируем (с обрезкой градиента)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [23]:
batch_size = 100        # Размер пакета
num_steps = 100         # Шагов в пакете
lstm_size = 512         # Количество LSTM юнитов в скрытом слое
num_layers = 2          # Количество LSTM слоев
learning_rate = 0.001   # Скорость обучения
keep_prob = 0.5         # Dropout keep probability

In [None]:
epochs = 100
# Сохраняться каждый N итераций
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Можно раскомментировать строчку ниже и продолжить обучение с checkpoint'а
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Обучаем сеть
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/100...  Training Step: 1...  Training loss: 4.9985...  1.8924 sec/batch
Epoch: 1/100...  Training Step: 2...  Training loss: 4.9372...  1.7500 sec/batch
Epoch: 1/100...  Training Step: 3...  Training loss: 4.5914...  1.8050 sec/batch
Epoch: 1/100...  Training Step: 4...  Training loss: 5.1673...  1.7510 sec/batch
Epoch: 1/100...  Training Step: 5...  Training loss: 4.2192...  1.7686 sec/batch
Epoch: 1/100...  Training Step: 6...  Training loss: 4.1412...  2.4027 sec/batch
Epoch: 1/100...  Training Step: 7...  Training loss: 4.0313...  2.8782 sec/batch
Epoch: 1/100...  Training Step: 8...  Training loss: 3.9251...  2.6238 sec/batch
Epoch: 1/100...  Training Step: 9...  Training loss: 3.8325...  2.8272 sec/batch
Epoch: 1/100...  Training Step: 10...  Training loss: 3.7844...  2.8783 sec/batch
Epoch: 1/100...  Training Step: 11...  Training loss: 3.7554...  2.5643 sec/batch
Epoch: 1/100...  Training Step: 12...  Training loss: 3.7051...  2.4892 sec/batch
Epoch: 1/100...  Training

Epoch: 4/100...  Training Step: 62...  Training loss: 3.4218...  2.6197 sec/batch
Epoch: 4/100...  Training Step: 63...  Training loss: 3.3923...  2.5727 sec/batch
Epoch: 4/100...  Training Step: 64...  Training loss: 3.4138...  2.5766 sec/batch
Epoch: 4/100...  Training Step: 65...  Training loss: 3.4075...  2.5881 sec/batch
Epoch: 4/100...  Training Step: 66...  Training loss: 3.3761...  2.6335 sec/batch
Epoch: 4/100...  Training Step: 67...  Training loss: 3.3731...  2.7188 sec/batch
Epoch: 4/100...  Training Step: 68...  Training loss: 3.3540...  2.6876 sec/batch
Epoch: 5/100...  Training Step: 69...  Training loss: 3.4408...  2.6838 sec/batch
Epoch: 5/100...  Training Step: 70...  Training loss: 3.3573...  2.6406 sec/batch
Epoch: 5/100...  Training Step: 71...  Training loss: 3.3652...  2.6374 sec/batch
Epoch: 5/100...  Training Step: 72...  Training loss: 3.3862...  2.6958 sec/batch
Epoch: 5/100...  Training Step: 73...  Training loss: 3.3704...  2.7286 sec/batch
Epoch: 5/100... 

Epoch: 10/100...  Training Step: 162...  Training loss: 3.0487...  2.4200 sec/batch
Epoch: 10/100...  Training Step: 163...  Training loss: 2.9695...  2.4446 sec/batch
Epoch: 10/100...  Training Step: 164...  Training loss: 2.9061...  2.4360 sec/batch
Epoch: 10/100...  Training Step: 165...  Training loss: 2.9018...  2.4400 sec/batch
Epoch: 10/100...  Training Step: 166...  Training loss: 2.9434...  2.4300 sec/batch
Epoch: 10/100...  Training Step: 167...  Training loss: 2.9065...  2.4315 sec/batch
Epoch: 10/100...  Training Step: 168...  Training loss: 2.9034...  2.4290 sec/batch
Epoch: 10/100...  Training Step: 169...  Training loss: 2.8793...  2.4235 sec/batch
Epoch: 10/100...  Training Step: 170...  Training loss: 2.8920...  2.4166 sec/batch
Epoch: 11/100...  Training Step: 171...  Training loss: 2.9464...  2.4270 sec/batch
Epoch: 11/100...  Training Step: 172...  Training loss: 2.8618...  2.5327 sec/batch
Epoch: 11/100...  Training Step: 173...  Training loss: 2.8691...  2.4634 se

Epoch: 16/100...  Training Step: 260...  Training loss: 2.5341...  2.4405 sec/batch
Epoch: 16/100...  Training Step: 261...  Training loss: 2.5356...  2.4619 sec/batch
Epoch: 16/100...  Training Step: 262...  Training loss: 2.5119...  2.4530 sec/batch
Epoch: 16/100...  Training Step: 263...  Training loss: 2.5095...  2.4385 sec/batch
Epoch: 16/100...  Training Step: 264...  Training loss: 2.5014...  2.4510 sec/batch
Epoch: 16/100...  Training Step: 265...  Training loss: 2.4965...  2.4233 sec/batch
Epoch: 16/100...  Training Step: 266...  Training loss: 2.4917...  2.4373 sec/batch
Epoch: 16/100...  Training Step: 267...  Training loss: 2.4774...  2.4275 sec/batch
Epoch: 16/100...  Training Step: 268...  Training loss: 2.5076...  2.4334 sec/batch
Epoch: 16/100...  Training Step: 269...  Training loss: 2.4959...  2.4315 sec/batch
Epoch: 16/100...  Training Step: 270...  Training loss: 2.4880...  2.4340 sec/batch
Epoch: 16/100...  Training Step: 271...  Training loss: 2.4605...  2.4988 se

Epoch: 22/100...  Training Step: 358...  Training loss: 2.4236...  2.5422 sec/batch
Epoch: 22/100...  Training Step: 359...  Training loss: 2.3552...  2.5168 sec/batch
Epoch: 22/100...  Training Step: 360...  Training loss: 2.3838...  2.4789 sec/batch
Epoch: 22/100...  Training Step: 361...  Training loss: 2.3707...  2.4707 sec/batch
Epoch: 22/100...  Training Step: 362...  Training loss: 2.3924...  2.4707 sec/batch
Epoch: 22/100...  Training Step: 363...  Training loss: 2.3980...  2.4589 sec/batch
Epoch: 22/100...  Training Step: 364...  Training loss: 2.3915...  2.4649 sec/batch
Epoch: 22/100...  Training Step: 365...  Training loss: 2.3886...  2.4719 sec/batch
Epoch: 22/100...  Training Step: 366...  Training loss: 2.3929...  2.4734 sec/batch
Epoch: 22/100...  Training Step: 367...  Training loss: 2.3770...  2.4569 sec/batch
Epoch: 22/100...  Training Step: 368...  Training loss: 2.3628...  2.4694 sec/batch
Epoch: 22/100...  Training Step: 369...  Training loss: 2.3620...  2.5233 se

In [24]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [25]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="Мой дядя самых честных правил,Когда не в шутку занемог,"):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [26]:
tf.train.get_checkpoint_state('checkpoints')


model_checkpoint_path: "checkpoints\\i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints\\i400_l512.ckpt"

In [27]:
checkpoint = 'checkpoints/i400_l512.ckpt'
samp = sample(checkpoint, 1000, lstm_size, len(vocab))
print(samp)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:

Future majo