In [24]:
# 영화 리뷰를 보고 긍정/부정 판별

In [2]:
import os, pathlib, shutil, random
from tensorflow import keras

batch_size = 32
base_dir = pathlib.Path('aclImdb')
val_dir = base_dir/'val'
train_dir = base_dir/'train'
for category in ('neg', 'pos'):
    os.makedirs(val_dir/category, exist_ok=True)
    files = os.listdir(train_dir / category)
    random.Random(42).shuffle(files)
    num_val_samples = int(0.2*len(files))
    val_files = files[-num_val_samples:]
    for fname in val_files:
        shutil.move(train_dir / category / fname, val_dir / category / fname)

train_ds = keras.utils.text_dataset_from_directory('aclImdb/train', batch_size=batch_size)
val_ds = keras.utils.text_dataset_from_directory('aclImdb/val', batch_size=batch_size)
test_ds = keras.utils.text_dataset_from_directory('aclImdb/test', batch_size=batch_size)

text_only_train_ds = train_ds.map(lambda x, y: x)

Found 55244 files belonging to 3 classes.
Found 19756 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads

        # keras 멀티헤드 어텐션
        self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) 
       
        # 두 개의 Dense로 이루어진 FFN(피드 포워드 네트워크)
        self.dense_proj = keras.Sequential([layers.Dense(dense_dim, activation='relu'), layers.Dense(embed_dim)]) 

        # 정규화
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    def call(self, inputs, mask=None):
        if mask is not None: # 패딩 마스크
            mask = mask[:, tf.newaxis, :]

        attention_output = self.attention(inputs, inputs, attention_mask = mask) # 멀티헤드 어텐션
        proj_input = self.layernorm_1(inputs + attention_output) # 정규화
        proj_output = self.dense_proj(proj_input) # FFN
        return self.layernorm_2(proj_input + proj_output) 

    def get_config(self): # 레이어의 설정을 반환
        config = super().get_config()
        config.update({'embed_dim': self.embed_dim, 'num_heads': self.num_heads, 'dense_dim': self.dense_dim})
        return config            

In [4]:
vocab_size = 20000
embed_dim = 256
num_heads = 2
dense_dim = 32

inputs = keras.Input(shape=(None,), dtype='int64')
x = layers.Embedding(vocab_size, embed_dim)(inputs)
x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)

outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 256)         5120000   
                                                                 
 transformer_encoder (Transf  (None, None, 256)        543776    
 ormerEncoder)                                                   
                                                                 
 global_max_pooling1d (Globa  (None, 256)              0         
 lMaxPooling1D)                                                  
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 257   

In [5]:
max_length = 600
max_tokens = 20000

text_vectorization = layers.TextVectorization(max_tokens=max_tokens, output_mode='int', output_sequence_length=max_length)
text_vectorization.adapt(text_only_train_ds)

int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)

2024-07-18 09:43:34.498971: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [55244]
	 [[{{node Placeholder/_4}}]]
2024-07-18 09:43:34.499229: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [55244]
	 [[{{node Placeholder/_4}}]]
2024-07-18 09:43:34.505684: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [None]:
callbacks = [keras.callbacks.ModelCheckpoint('transformer_encoder.keras', save_best_only=True)]

model.fit(int_train_ds, validation_data = int_val_ds, epochs=5, callbacks=callbacks)

Epoch 1/5


2024-07-18 00:30:54.796915: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype int64
	 [[{{node Placeholder/_20}}]]
2024-07-18 00:30:54.797198: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype int64
	 [[{{node Placeholder/_20}}]]




2024-07-18 01:21:20.563958: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype int64
	 [[{{node Placeholder/_20}}]]
2024-07-18 01:21:20.564464: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [18446]
	 [[{{node Placeholder/_0}}]]


Epoch 2/5
 139/1768 [=>............................] - ETA: 43:51 - loss: -9211.8955 - acc: 0.0558

In [6]:
model = keras.models.load_model('transformer_encoder.keras', custom_objects={'TransformerEncoder': TransformerEncoder})
print('test acc : ', model.evaluate(int_test_ds)[1])

2024-07-18 09:43:41.681261: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_19' with dtype string
	 [[{{node Placeholder/_19}}]]
2024-07-18 09:43:41.681550: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_17' with dtype resource
	 [[{{node Placeholder/_17}}]]


  6/782 [..............................] - ETA: 13:29 - loss: 4701.3496 - acc: 0.5469

KeyboardInterrupt: 

In [7]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(input_dim = input_dim, output_dim = output_dim) # 입력 시퀸스 토큰을 벡터화
        self.position_embeddings = layers.Embedding(input_dim = sequence_length, output_dim = output_dim) # 시퀸스의 위치를 벡터화
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1] # 입력 시퀸스의 길이
        positions = tf.range(start=0, limit=length, delta=1) # 위치 인덱스 생성
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions # 위치정보가 포함된 입력 시퀸스 임베딩을 반환
        
    #def compute_mask(self, inputs, mask=None):
    #    return tf.math.not_equal(inputs, 0) # 0이 아닌 경우 True, 맞을 경우 False

    def get_config(self):
        config = super().get_config()
        config.update({'output_dim': self.output_dim, 'sequence_length': self.sequence_length, 'input_dim': self.input_dim})
        return config

In [8]:
# 사람이 많고 문이 하나일 때는 한 명씩 들어와서 순서 정보가 중요하지 않지만,
# 만약 문이 정말 넓다면, 들어가는 순서를 예측할 수 없다.(의미가 없어진다.)
# 따라서 직접 위치 정보를 포함시켜 전송시켜주도록 구성해준다.(서로 어느 위치에 있었고 어느 키워드 옆 쪽에 있었는지 등)
# 해당 토큰에 대한 예측이 더욱 수월해진다.

In [9]:
vocab_size = 20000
sequence_length = 600
embed_dim = 256
num_heads = 2
dense_dim = 32

inputs = keras.Input(shape=(None,), dtype='int64')
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)
x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)

outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None)]            0         
                                                                 
 positional_embedding (Posit  (None, None, 256)        5273600   
 ionalEmbedding)                                                 
                                                                 
 transformer_encoder_1 (Tran  (None, None, 256)        543776    
 sformerEncoder)                                                 
                                                                 
 global_max_pooling1d_1 (Glo  (None, 256)              0         
 balMaxPooling1D)                                                
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                           

In [None]:
callbacks = [keras.callbacks.ModelCheckpoint('positional_transformer_encoder.keras', save_best_only=True)]

model.fit(int_train_ds, validation_data=int_val_ds, epochs=5, callbacks=callbacks)

Epoch 1/5


2024-07-18 09:43:54.868000: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [55244]
	 [[{{node Placeholder/_4}}]]
2024-07-18 09:43:54.868306: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_19' with dtype string
	 [[{{node Placeholder/_19}}]]


  38/1727 [..............................] - ETA: 1:33:48 - loss: -54.4104 - acc: 0.0518

In [None]:
model = keras.models.load_model('pull_transformer_encoder.keras', custom_objects={'TransformerEncoder': TransformerEncoder,
                                                                                  'PositionalEmbedding': PositionalEmbedding})
print('test acc : ', model.evaluate(int_test_ds)[1])