In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import tensorflow as tf
import tensorflow_datasets as tfds
from keras.preprocessing.sequence import pad_sequences
from nlp_utils import preprocess_sentence, TextTokenizing
from transformer import transformer, CustomSchedule, loss_function

In [2]:
train_data = pd.read_csv("./final_dataset.csv")
train_data.head()

Unnamed: 0,Q,A
0,안녕하세요,️️
1,이거 해봐요><,"나의 직장인 멘탈 성향은 [안챙겨도 잘커요, 탕비실 선인장] 당신의 멘탈 성향은 ..."
2,오 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ오 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ 안챙겨도 잘커요 너무 맞는데요ㅜㅜ? 자...,ㅋㅌㅋㅋㅋㅋㅋㅌㅋㅋㅋㅋ 아녜여 챙겨주세요
3,ㅋㅋㅋㅋㅋ당연하쥬ㅋㅋㅋㅋㅋ당연하쥬 누굴 챙길 여유는 저도 없는거같지만 그러나 점심 ...,그렇게 큰 권한을 주신다구요??그렇게 큰 권한을 주신다구요?? name1님 완전 대인배
4,"목요일 점심메뉴도 생각해오세요 크크 전 닭가슴살 먹을거지만,,",흠 그럼 저도흠 그럼 저도 한번 도시락을 싸올까요


In [3]:
questions = [preprocess_sentence(q) for q in train_data["Q"]]
answers = [preprocess_sentence(a) for a in train_data["A"]]


print(questions[:3])
print(questions[:3])

['안녕하세요', '이거 해봐요><', '오 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ오 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ 안챙겨도 잘커요 너무 맞는데요ㅜㅜ ? 자세한 내용은 더 알아가야겟지만~~']
['안녕하세요', '이거 해봐요><', '오 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ오 ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ 안챙겨도 잘커요 너무 맞는데요ㅜㅜ ? 자세한 내용은 더 알아가야겟지만~~']


# For Small Model -> Small Dataset

In [4]:
questions = questions[:50000]
answers = answers[:50000]

In [48]:
tokenizer = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
    questions+answers, target_vocab_size=2**16
)

In [49]:
tokenizer.save_to_file("super_small_vocab")

In [5]:
tokenizer = tfds.deprecated.text.SubwordTextEncoder.load_from_file("super_small_vocab")

In [6]:
START_TOKEN, END_TOKEN = [tokenizer.vocab_size], [tokenizer.vocab_size+1]

VOCAB_SIZE = tokenizer.vocab_size + 2

START_TOKEN, END_TOKEN

([62144], [62145])

In [7]:
MAX_LENGTH = 50

def tokenize_and_filter(questions, answers):
    tokenized_inputs, tokenized_outputs = [], []

    for (input, output) in zip(questions, answers):
        input = START_TOKEN + tokenizer.encode(input) + END_TOKEN
        output = START_TOKEN + tokenizer.encode(output) + END_TOKEN

        tokenized_inputs.append(input)
        tokenized_outputs.append(output)
    
    tokenized_inputs = pad_sequences(tokenized_inputs, maxlen=MAX_LENGTH, padding="post")
    tokenized_outputs = pad_sequences(tokenized_outputs, maxlen=MAX_LENGTH, padding="post")

    return tokenized_inputs, tokenized_outputs

In [8]:
questions, answers = tokenize_and_filter(questions, answers)

In [9]:
print(f'질문 데이터의 크기:{questions.shape}')
print(f'답변 데이터의 크기:{answers.shape}')

질문 데이터의 크기:(50000, 50)
답변 데이터의 크기:(50000, 50)


In [10]:
# 텐서플로우 dataset을 이용하여 셔플(shuffle)을 수행하되, 배치 크기로 데이터를 묶는다.
# 또한 이 과정에서 교사 강요(teacher forcing)을 사용하기 위해서 디코더의 입력과 실제값 시퀀스를 구성한다.
BATCH_SIZE = 64
BUFFER_SIZE = 20000

# Decoder real sequence has to remove <SOS> token
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'inputs': questions,
        'dec_inputs': answers[:, :-1] # decoder input. Last Padding Token removed
    },
    {
        'outputs': answers[:, 1:] # First Token removed. <sos> token gone
    }
))

dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [11]:
# 임의의 샘플에 대해서 [:, :-1]과 [:, 1:]이 어떤 의미를 가지는지 테스트해본다.
print(answers[0]) # 기존 샘플
print(answers[:1][:, :-1]) # 마지막 패딩 토큰 제거하면서 길이가 39가 된다.
print(answers[:1][:, 1:]) # 맨 처음 토큰이 제거된다. 다시 말해 시작 토큰이 제거된다. 길이는 역시 39가 된다.

[62144 14444 62145     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0]
[[62144 14444 62145     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0]]
[[14444 62145     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0]]


In [None]:
from hyperparameters import NUM_LAYERS, D_MODEL, NUM_HEADS, DFF, DROPOUT

In [13]:
tf.keras.backend.clear_session()

In [14]:
model = transformer(
    vocab_size=VOCAB_SIZE,
    num_layers=NUM_LAYERS,
    dff=DFF,
    d_model=D_MODEL,
    num_heads=NUM_HEADS,
    dropout=DROPOUT)

(1, 62146, 256)
(1, 62146, 256)


In [15]:
model.summary()

Model: "transformer"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputs (InputLayer)            [(None, None)]       0           []                               
                                                                                                  
 dec_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 enc_padding_mask (Lambda)      (None, 1, 1, None)   0           ['inputs[0][0]']                 
                                                                                                  
 encoder (Functional)           (None, None, 256)    16963584    ['inputs[0][0]',                 
                                                                  'enc_padding_mask[0][0

In [16]:
import os

# Checkpoint
checkpoint_path = "training_small/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# save weights in each five epochs
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True, save_best_only=True)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True, save_freq=3)

# model.save_weights(checkpoint_path.format(epoch=0))

In [17]:
learning_rate = CustomSchedule(D_MODEL)

optimizer = tf.keras.optimizers.Adam(
    learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

def accuracy(y_true, y_pred):
  # ensure labels have shape (batch_size, MAX_LENGTH - 1)
  y_true = tf.reshape(y_true, shape=(-1, MAX_LENGTH - 1))
  return tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred)

model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy])

In [18]:
EPOCHS = 40
model.fit(dataset, epochs=EPOCHS, callbacks=[cp_callback])

Epoch 1/40
  2/782 [..............................] - ETA: 4:41 - loss: 2.5362 - accuracy: 0.0000e+00   
Epoch 1: saving model to training_small\cp-0001.ckpt
  5/782 [..............................] - ETA: 10:29 - loss: 2.6307 - accuracy: 0.0000e+00
Epoch 1: saving model to training_small\cp-0001.ckpt
  8/782 [..............................] - ETA: 12:04 - loss: 2.5406 - accuracy: 0.0000e+00
Epoch 1: saving model to training_small\cp-0001.ckpt
 11/782 [..............................] - ETA: 12:26 - loss: 2.5923 - accuracy: 0.0000e+00
Epoch 1: saving model to training_small\cp-0001.ckpt
 14/782 [..............................] - ETA: 12:15 - loss: 2.5869 - accuracy: 0.0000e+00
Epoch 1: saving model to training_small\cp-0001.ckpt
 17/782 [..............................] - ETA: 12:07 - loss: 2.6148 - accuracy: 0.0000e+00
Epoch 1: saving model to training_small\cp-0001.ckpt
 20/782 [..............................] - ETA: 11:57 - loss: 2.6214 - accuracy: 0.0000e+00
Epoch 1: saving model to 

ResourceExhaustedError: Graph execution error:

Detected at node 'transformer/outputs/Tensordot/MatMul' defined at (most recent call last):
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\asyncio\base_events.py", line 596, in run_forever
      self._run_once()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\asyncio\base_events.py", line 1890, in _run_once
      handle._run()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
      await result
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
      result = self._run_cell(
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
      return runner(coro)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\YSH\AppData\Local\Temp/ipykernel_1808/449592082.py", line 2, in <module>
      model.fit(dataset, epochs=EPOCHS, callbacks=[cp_callback])
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1370, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1013, in train_function
      return step_function(self, iterator)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1002, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 992, in run_step
      outputs = model.train_step(data)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 851, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\base_layer.py", line 993, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\functional.py", line 450, in call
      return self._run_internal_graph(
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\functional.py", line 588, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\engine\base_layer.py", line 993, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\YSH\anaconda3\envs\mlenv\lib\site-packages\keras\layers\core\dense.py", line 222, in call
      outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]])
Node: 'transformer/outputs/Tensordot/MatMul'
OOM when allocating tensor with shape[3136,62146] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node transformer/outputs/Tensordot/MatMul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_13660]