<a href="https://colab.research.google.com/github/vitamingyu/NLP-LLM/blob/main/tf_48Transformer_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# imdb dataset으로 감성 분류
# Transformer는 입력 데이터들간의 상호작용을 고려하는 self-attention

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Sequential

In [22]:
#encoder block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)  # sequence에 따른 고정 길이 정규화로 batch normalization에 비해 언어 모델
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    res = x + inputs

    # Feed Forward NN
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_sentment_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    embedding_layer=layers.Embedding(input_dim=1000, output_dim=64, input_length=input_shape[0])(inputs)
    x = embedding_layer + tf.random.normal(shape=tf.shape(embedding_layer))
    # Transformer blocks
    for _ in range(num_transformer_blocks):
      x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D()(x)  # 공간차원 축소

    #MLP
    for dim in mlp_units:
      x = layers.Dense(dim, activation = 'relu')(x)
      x = layers.Dropout(mlp_dropout)(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)

    return keras.Model(inputs = inputs, outputs=outputs)

In [24]:
(x_train, y_train),(x_val, y_val) = keras.datasets.imdb.load_data(num_words=10000)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=100)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=100)

# example usage
input_shape=(100,)
head_size = 256
num_heads = 4
ff_dim = 4
num_transformer_blocks = 4
mlp_units = [128]
dropout = 0.25
mlp_dropout=0.25

model = build_sentment_model(
    input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout, mlp_dropout
)

print(model.summary())

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_9 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 embedding_8 (Embedding)     (None, 100, 64)              64000     ['input_9[0][0]']             
                                                                                                  
 tf.compat.v1.shape_8 (TFOp  (3,)                         0         ['embedding_8[0][0]']         
 Lambda)                                                                                          
                                                                                                  
 tf.random.normal_8 (TFOpLa  (None, 100, 64)              0         ['tf.compat.v1.shape_8[0