<a href="https://colab.research.google.com/github/vitamingyu/NLP-LLM/blob/main/tf_48Transformer_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# imdb dataset으로 감성 분류
# Transformer는 입력 데이터들간의 상호작용을 고려하는 self-attention

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Sequential

In [2]:
#encoder block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)  # sequence에 따른 고정 길이 정규화로 batch normalization에 비해 언어 모델
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    res = x + inputs

    # Feed Forward NN
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_sentment_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    embedding_layer=layers.Embedding(input_dim=1000, output_dim=64, input_length=input_shape[0])(inputs)
    x = embedding_layer + tf.random.normal(shape=tf.shape(embedding_layer))
    # Transformer blocks
    for _ in range(num_transformer_blocks):
      x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D()(x)  # 공간차원 축소

    #MLP
    for dim in mlp_units:
      x = layers.Dense(dim, activation = 'relu')(x)
      x = layers.Dropout(mlp_dropout)(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)

    return keras.Model(inputs = inputs, outputs=outputs)

In [3]:
(x_train, y_train),(x_val, y_val) = keras.datasets.imdb.load_data(num_words=10000)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=100)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=100)

# example usage
input_shape=(100,)
head_size = 256
num_heads = 4
ff_dim = 4
num_transformer_blocks = 4
mlp_units = [128]
dropout = 0.25
mlp_dropout=0.25

model = build_sentment_model(
    input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout, mlp_dropout
)

print(model.summary())

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 100, 64)              64000     ['input_1[0][0]']             
                                                                                                  
 tf.compat.v1.shape (TFOpLa  (3,)                         0         ['embedding[0][0]']           
 mbda)                                                                                            
                                                                                                  
 tf

In [4]:
model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=1e-4),metrics=['acc'])

history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs = 10, batch_size=32, verbose=2)
print(history.history)

Epoch 1/10
782/782 - 91s - loss: 0.6955 - acc: 0.5031 - val_loss: 0.6934 - val_acc: 0.5077 - 91s/epoch - 116ms/step
Epoch 2/10
782/782 - 40s - loss: 0.6893 - acc: 0.5355 - val_loss: 0.6782 - val_acc: 0.5706 - 40s/epoch - 52ms/step
Epoch 3/10
782/782 - 38s - loss: 0.6658 - acc: 0.5963 - val_loss: 0.6488 - val_acc: 0.6202 - 38s/epoch - 48ms/step
Epoch 4/10
782/782 - 37s - loss: 0.6390 - acc: 0.6352 - val_loss: 0.6256 - val_acc: 0.6462 - 37s/epoch - 48ms/step
Epoch 5/10
782/782 - 54s - loss: 0.6154 - acc: 0.6654 - val_loss: 0.6063 - val_acc: 0.6688 - 54s/epoch - 69ms/step
Epoch 6/10
782/782 - 36s - loss: 0.5960 - acc: 0.6814 - val_loss: 0.5866 - val_acc: 0.6856 - 36s/epoch - 46ms/step
Epoch 7/10
782/782 - 36s - loss: 0.5765 - acc: 0.6980 - val_loss: 0.5743 - val_acc: 0.6952 - 36s/epoch - 46ms/step
Epoch 8/10
782/782 - 38s - loss: 0.5634 - acc: 0.7085 - val_loss: 0.5522 - val_acc: 0.7149 - 38s/epoch - 49ms/step
Epoch 9/10
782/782 - 41s - loss: 0.5413 - acc: 0.7236 - val_loss: 0.5392 - val_

In [12]:
text_sample = ['I love movie', "This movie sucks, it's so boring"]
max_len = 100
tok = keras.datasets.imdb.get_word_index()
text_sequence = [[tok[word] if word in tok else 0 for word in sample.split()] for sample in text_sample]
text_sequence = keras.preprocessing.sequence.pad_sequences(text_sequence, maxlen=max_len)
print(text_sequence)

pred = model.predict(text_sequence)

b_pred = (pred > 0.5 ).astype(int)

for i, samp in enumerate(text_sample):
  print(f'sample : {samp}')
  print(f'predicted : {"Positive" if b_pred[i] == 1 else "Negative"}')
  print(f"confidencs : {pred[i][0]:.3f}\n")

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0 116  17]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0  17   0  42  35 354]]
sample : I love movie
predicted : Positive
confidencs : 0.652

sample : This movie sucks, it's so boring
predicted : Negative
confidencs : 0.455

