<a href="https://colab.research.google.com/github/xarvel/DataScience/blob/master/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [125]:
import tensorflow as tf

import numpy as np
import os
import time
from tqdm import tqdm

print(tf.__version__)

2.14.0


In [126]:
path_to_file = tf.keras.utils.get_file('bible.txt', 'https://drive.google.com/uc?export=download&id=1-4rl2K0zr5zj3Lj8tzVOEMF152gtApVP')

In [127]:
#@title utils

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

def text_stats(text):
  sample_size = 250
  print(f'Sample {sample_size} characters:')
  print('-' * 80)
  start = 150
  # Take a look at the first 250 characters in text
  print(text[start:start + sample_size])
  print('-' * 80)
  # length of text is the number of characters in it
  print(f'Length of text: {len(text)} characters')
  # The unique characters in the file
  vocab = sorted(set(text))
  print(f'{len(vocab)} unique characters')

  return vocab

In [128]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
vocab = text_stats(text)

ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab),
    mask_token=None
)
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(),
    invert=True,
    mask_token=None
)

Sample 250 characters:
--------------------------------------------------------------------------------
о и землю.

2 Земля же была безвидна и пуста, и тьма над бездною, и Дух Божий носился над водою.

3 И сказал Бог: да будет свет. И стал свет.

4 И увидел Бог свет, что он хорош, и отделил Бог свет от тьмы.

5 И назвал Бог свет днем, а тьму ночью. И б
--------------------------------------------------------------------------------
Length of text: 4196242 characters
92 unique characters


In [129]:
#@title CONFIG

SEQUENCE_LENGTH = 100

# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

# The embedding dimension
EMBEDDING_DIMENTION = 256

# Number of RNN units
RNN_UNITS = 1024

EPOCHS = 30

# Length of the vocabulary in StringLookup Layer
VOCAB_SIZE = len(ids_from_chars.get_vocabulary())

# Directory where the checkpoints will be saved
CHECKPOINT_DIR = './training_checkpoints'
# Name of the checkpoint files
CHECKPOINT_PREFIX = os.path.join(CHECKPOINT_DIR, "ckpt_{epoch}")

SEED = 1

CHECKPOINT_INTERVAL = 5

optimizer = tf.keras.optimizers.Adam()
loss_func = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [130]:
tf.random.set_seed(SEED)

In [131]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [132]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
sequences = ids_dataset.batch(SEQUENCE_LENGTH + 1, drop_remainder=True)

In [133]:
#@title DATASET


dataset = sequences.map(split_input_target)
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
STEPS_PER_EPOCH = len(dataset)
dataset = dataset.repeat()
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

print(STEPS_PER_EPOCH)

649


In [134]:
class RNNModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)

    self.embedding = tf.keras.layers.Embedding(
        vocab_size,
        embedding_dim
    )
    self.gru = tf.keras.layers.GRU(
        rnn_units,
        return_sequences=True,
        return_state=True
    )
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x


class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()

    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[ -float('inf') ] * len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())]
    )
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(
        inputs=input_ids,
        states=states,
        return_state=True
    )
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits / self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [135]:
model = RNNModel(
    vocab_size=VOCAB_SIZE,
    embedding_dim=EMBEDDING_DIMENTION,
    rnn_units=RNN_UNITS,
)

one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [136]:
model.build((SEQUENCE_LENGTH, VOCAB_SIZE))
model.summary()

Model: "rnn_model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     multiple                  23808     
                                                                 
 gru_6 (GRU)                 multiple                  3938304   
                                                                 
 dense_6 (Dense)             multiple                  95325     
                                                                 
Total params: 4057437 (15.48 MB)
Trainable params: 4057437 (15.48 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [137]:
def predict_text(start_text, size):
  states = None
  next_char = tf.constant([start_text])
  result = [next_char]

  for n in range(size):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

  result = tf.strings.join(result)

  return result[0].numpy().decode('utf-8')

def sample_text():
  start = time.time()
  print('Sample:')
  print('-' * 80)
  print(predict_text('И сказал Господь:', 40))
  print('-' * 80)
  end = time.time()
  print('\nRun time:', end - start)

@tf.function
def train_step(inputs):
  inputs, labels = inputs
  with tf.GradientTape() as tape:
    predictions = model(inputs, training=True)
    loss = loss_func(labels, predictions)

  grads = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  return {'loss': loss}

In [138]:
def train(dataset):
  mean = tf.metrics.Mean()
  train_iterator = iter(dataset)

  for epoch in range(1, EPOCHS + 1):
      start = time.time()

      print('Epoch: {}/{}'.format(epoch, EPOCHS))
      pbar = tqdm(range(STEPS_PER_EPOCH))

      mean.reset_states()
      for step in pbar:
        inp, target = next(train_iterator)
        logs = train_step([inp, target])
        mean.update_state(logs['loss'])
        pbar.set_postfix({
          'loss': round(float(logs['loss']), 4)
        })
        pbar.set_description("Current step %s" % step)

      # saving (checkpoint) the model every 5 epochs
      if epoch % CHECKPOINT_INTERVAL == 0:
          model.save_weights(CHECKPOINT_PREFIX.format(epoch=epoch))

      sample_text()

      print('Loss: {}'.format(round(float(mean.result().numpy()), 4)))
      print('Time for epoch {} is {} sec'.format(epoch, time.time()-start))

In [139]:
train(dataset)

Epoch: 1/30


Current step 648: 100%|██████████| 649/649 [00:41<00:00, 15.76it/s, loss=1.6]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: опеявшей устребит ли в себе вас, как и 
--------------------------------------------------------------------------------

Run time: 3.889526844024658
Loss: 2.1307
Time for epoch 1 is 45.069196939468384 sec
Epoch: 2/30


Current step 648: 100%|██████████| 649/649 [00:38<00:00, 16.64it/s, loss=1.44]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: защащайте до них. Кто Ты твой знание, н
--------------------------------------------------------------------------------

Run time: 0.28116703033447266
Loss: 1.4676
Time for epoch 2 is 39.28883218765259 sec
Epoch: 3/30


Current step 648: 100%|██████████| 649/649 [00:40<00:00, 16.19it/s, loss=1.3]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: вот, вы выше небес.

30 Пребывает Симон
--------------------------------------------------------------------------------

Run time: 0.16859722137451172
Loss: 1.3241
Time for epoch 3 is 40.25928521156311 sec
Epoch: 4/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.52it/s, loss=1.25]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: если бы ты не послушаешь долготерпеть п
--------------------------------------------------------------------------------

Run time: 0.17113757133483887
Loss: 1.2514
Time for epoch 4 is 39.46820378303528 sec
Epoch: 5/30


Current step 648: 100%|██████████| 649/649 [00:38<00:00, 16.79it/s, loss=1.22]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: за мратит предавшись слепым чудесами Св
--------------------------------------------------------------------------------

Run time: 0.1932380199432373
Loss: 1.2003
Time for epoch 5 is 38.99290919303894 sec
Epoch: 6/30


Current step 648: 100%|██████████| 649/649 [00:38<00:00, 16.74it/s, loss=1.18]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: виноградную в тело Мне так, что даете в
--------------------------------------------------------------------------------

Run time: 0.29003310203552246
Loss: 1.1593
Time for epoch 6 is 39.08650994300842 sec
Epoch: 7/30


Current step 648: 100%|██████████| 649/649 [00:38<00:00, 16.70it/s, loss=1.13]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: идите к сыну твоему; но Я наведу Избави
--------------------------------------------------------------------------------

Run time: 0.11163830757141113
Loss: 1.1241
Time for epoch 7 is 38.97584295272827 sec
Epoch: 8/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.56it/s, loss=1.1]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: вот, теперь не давайте господам, без пр
--------------------------------------------------------------------------------

Run time: 0.10673403739929199
Loss: 1.0924
Time for epoch 8 is 39.313180685043335 sec
Epoch: 9/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.56it/s, loss=1.12]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: вот, раба Моего Духом землями?

5 А ты 
--------------------------------------------------------------------------------

Run time: 0.2162625789642334
Loss: 1.065
Time for epoch 9 is 39.427128076553345 sec
Epoch: 10/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.64it/s, loss=1.05]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: спрашивай: если исполнители заповедей Е
--------------------------------------------------------------------------------

Run time: 0.22267866134643555
Loss: 1.0404
Time for epoch 10 is 39.36917972564697 sec
Epoch: 11/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.58it/s, loss=1.06]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: стой тучных миртов, или нечестивец, да 
--------------------------------------------------------------------------------

Run time: 0.11688423156738281
Loss: 1.019
Time for epoch 11 is 39.27136421203613 sec
Epoch: 12/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.60it/s, loss=1.03]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: так в те дни, и нашел шест святого дня 
--------------------------------------------------------------------------------

Run time: 0.10583305358886719
Loss: 1.0009
Time for epoch 12 is 39.219953298568726 sec
Epoch: 13/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.55it/s, loss=1.06]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: сего тех, которые вы поступаете с Перво
--------------------------------------------------------------------------------

Run time: 0.11270999908447266
Loss: 0.9859
Time for epoch 13 is 39.327078104019165 sec
Epoch: 14/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.59it/s, loss=1.01]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: Господи! кто Мой будешь, и все, возлюбл
--------------------------------------------------------------------------------

Run time: 0.15707182884216309
Loss: 0.9733
Time for epoch 14 is 39.27856707572937 sec
Epoch: 15/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.63it/s, loss=1.03]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: течет хотите Вассадовала, тотчас постиг
--------------------------------------------------------------------------------

Run time: 0.22305846214294434
Loss: 0.9632
Time for epoch 15 is 39.37010431289673 sec
Epoch: 16/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.54it/s, loss=0.986]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: Я Сей я виновно; ибо с тобою Шув].

28 
--------------------------------------------------------------------------------

Run time: 0.10647869110107422
Loss: 0.9551
Time for epoch 16 is 39.34582543373108 sec
Epoch: 17/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.57it/s, loss=0.987]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: просиму вам глаза и скорчились мудрые.

--------------------------------------------------------------------------------

Run time: 0.15164613723754883
Loss: 0.95
Time for epoch 17 is 39.31575655937195 sec
Epoch: 18/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.50it/s, loss=0.961]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: одна ли она благоугодно, то и делающие 
--------------------------------------------------------------------------------

Run time: 0.2366635799407959
Loss: 0.9451
Time for epoch 18 is 39.588895082473755 sec
Epoch: 19/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.58it/s, loss=1.01]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: и Я превратился в Ефес?

4 Нераажув рук
--------------------------------------------------------------------------------

Run time: 0.1662147045135498
Loss: 0.9431
Time for epoch 19 is 39.32254242897034 sec
Epoch: 20/30


Current step 648: 100%|██████████| 649/649 [00:38<00:00, 16.68it/s, loss=1.02]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: истреблю к вашего тому, кто избавит его
--------------------------------------------------------------------------------

Run time: 0.1810452938079834
Loss: 0.9406
Time for epoch 20 is 39.1776020526886 sec
Epoch: 21/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.53it/s, loss=0.966]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: пойдем все то, обращаясь.

24 Помышлять
--------------------------------------------------------------------------------

Run time: 0.1624903678894043
Loss: 0.9407
Time for epoch 21 is 39.43990087509155 sec
Epoch: 22/30


Current step 648: 100%|██████████| 649/649 [00:38<00:00, 16.65it/s, loss=0.982]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: а ты поступлена дать тебе, а истоптал н
--------------------------------------------------------------------------------

Run time: 0.1083228588104248
Loss: 0.9414
Time for epoch 22 is 39.09136652946472 sec
Epoch: 23/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.47it/s, loss=0.971]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: не забывай Меня, говорит Господь; и ист
--------------------------------------------------------------------------------

Run time: 0.11270976066589355
Loss: 0.9421
Time for epoch 23 is 39.536860942840576 sec
Epoch: 24/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.56it/s, loss=0.978]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: встав и поклонясь все и услышал, что не
--------------------------------------------------------------------------------

Run time: 0.11475777626037598
Loss: 0.9441
Time for epoch 24 is 39.305697441101074 sec
Epoch: 25/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.62it/s, loss=0.992]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: клясть творящего вино;

6 и как он услы
--------------------------------------------------------------------------------

Run time: 0.09719276428222656
Loss: 0.9477
Time for epoch 25 is 39.20631003379822 sec
Epoch: 26/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.54it/s, loss=1.01]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: правду, областный в законе и живых, по 
--------------------------------------------------------------------------------

Run time: 0.1129603385925293
Loss: 0.9495
Time for epoch 26 is 39.366700887680054 sec
Epoch: 27/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.55it/s, loss=1.01]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: истинно Я нашел в нем в ноздри, потому 
--------------------------------------------------------------------------------

Run time: 0.11366009712219238
Loss: 0.9547
Time for epoch 27 is 39.330199003219604 sec
Epoch: 28/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.52it/s, loss=0.98]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: книги Ты наказание Твое,

23 ибо я увер
--------------------------------------------------------------------------------

Run time: 0.15414881706237793
Loss: 0.9602
Time for epoch 28 is 39.44335603713989 sec
Epoch: 29/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.56it/s, loss=0.988]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: превосходится тебе три тысячи волос на 
--------------------------------------------------------------------------------

Run time: 0.22824430465698242
Loss: 0.9631
Time for epoch 29 is 39.42437267303467 sec
Epoch: 30/30


Current step 648: 100%|██████████| 649/649 [00:39<00:00, 16.58it/s, loss=0.999]


Sample:
--------------------------------------------------------------------------------
И сказал Господь: истребление было такое случиться избыто
--------------------------------------------------------------------------------

Run time: 0.2166755199432373
Loss: 0.9676
Time for epoch 30 is 39.49124217033386 sec


In [140]:
print(predict_text('И сказал Господь:', 100))

И сказал Господь: псотушите Мне. Они же, мы все погибли с ними.

14 И кто ступится поручить тебя, Иуда, узнав образу 
