In [1]:
!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -xf aclImdb_v1.tar.gz

--2023-12-20 05:51:15--  https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz.1’


2023-12-20 05:51:17 (40.0 MB/s) - ‘aclImdb_v1.tar.gz.1’ saved [84125825/84125825]



In [3]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # 메모리 사용 제한을 위한 설정
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    # 특정 GPU만 사용하도록 설정
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
  except RuntimeError as e:
    # 프로그램 시작 후에는 GPU 설정을 변경할 수 없으므로
    # 런타임 오류 발생 시 예외 처리가 필요함
    print(e)

Num GPUs Available:  1


In [15]:
import tensorflow as tf
from tensorflow import keras
dataset = keras.utils.text_dataset_from_directory(
    directory="aclImdb", label_mode=None, batch_size=256)
dataset = dataset.map(lambda x: tf.strings.regex_replace(x, "<br />", " "))

Found 105006 files belonging to 1 classes.


In [16]:
for data_batch in dataset.take(1):
    print(data_batch)

tf.Tensor(
[b"The planning episodes were a bit dull, but when they reached the desert it was quite fun to watch. The reason why I call it the most realistic reality show is because, much to my surprise,Charley fell out of the race relatively early. When his hands were sore, I expected the usual stress and then a miracle fix, but instead he actually quit the race. The most anxious moment of the show must've been when Max was stuck out in the desert with almost no water or food! The ending was great and I was very happy to see at least one of the team make it. Overall, not as great as the Long Way Round, but definitely an interesting watch, as one gets a peek into the most challenging race in the world."
 b'Dwight Yoakam must be a hell of a guy. He\'s personable in interviews, plays and sings his own music with panache, and did a great job of acting in "Slingblade". He\'s such a cool guy that he was able to get a lot of his friends to be in a really bad Western.  I love a good Western. T

In [17]:
from tensorflow.keras.layers import TextVectorization

sequence_length = 100
vocab_size = 15000
text_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length,
)
text_vectorization.adapt(dataset)


In [18]:
for data_batch in dataset.take(1):
    print(data_batch)

tf.Tensor(
[b"I was in college in Laramie, Wyoming, at the time of the Matthew Shepard tragedy, and will always be ashamed that such a crime could have taken place in my community. With that said, this movie gets it all wrong. They went for easy plot devices, the furthering of stereotypes (which shouldn't be Matthew's legacy), and a misrepresentation of the community. The sad part is that with a little work, they could have made a film that sent a lasting message. Instead they made a movie that portrayed a town full of simple minded rednecks who somehow have Southern accents, a police force consisting of a Barney Fife cop, and a total reinvention of certain parts of the story. Being that the idiots that murdered Matthew came from Laramie, examining the responsibility of the community is fair, but this isn't examination. This is lazy story telling that MTV unfortunately passed off as an educational film (They used to show it late at night without commercials as part of their Cable in th

In [19]:
def prepare_lm_dataset(text_batch):
    vectorized_sequences = text_vectorization(text_batch)
    x = vectorized_sequences[:, :-1]
    y = vectorized_sequences[:, 1:]
    return x, y

lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)

In [51]:
import tensorflow as tf
from tensorflow.keras import layers

class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=input_dim, output_dim=output_dim)
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=output_dim)
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)

    def get_config(self):
        config = super(PositionalEmbedding, self).get_config()
        config.update({
            "output_dim": self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
        })
        return config


class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
          num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = layers.MultiHeadAttention(
          num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True

    def get_config(self):
        config = super(TransformerDecoder, self).get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config

    # def get_causal_attention_mask(self, inputs):
    #     input_shape = tf.shape(inputs)
    #     batch_size, sequence_length = input_shape[0], input_shape[1]
    #     i = tf.range(sequence_length)[:, tf.newaxis]
    #     j = tf.range(sequence_length)
    #     mask = tf.cast(i >= j, dtype="int32")
    #     mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
    #     mult = tf.concat(
    #         [tf.expand_dims(batch_size, -1),
    #          tf.constant([1, 1], dtype=tf.int32)], axis=0)
    #     return tf.tile(mask, mult)

    def call(self, inputs, encoder_outputs, mask=None):
        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs
        )
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
    
        attention_output_2 = self.attention_2(
            query=attention_output_1,
            value=encoder_outputs,
            key=encoder_outputs
            )
        attention_output_2 = self.layernorm_2(attention_output_1 + attention_output_2)
    
        proj_output = self.dense_proj(attention_output_2)
        return self.layernorm_3(attention_output_2 + proj_output)

In [53]:
from tensorflow.keras import layers
embed_dim = 256
latent_dim = 2048
num_heads = 2

inputs = keras.Input(shape=(None,), dtype="int64")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)
x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, x)
outputs = layers.Dense(vocab_size, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(loss="sparse_categorical_crossentropy", optimizer="rmsprop")

In [54]:
import numpy as np

tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))

def sample_next(predictions, temperature=1.0):
    predictions = np.asarray(predictions).astype("float64")
    predictions = np.log(predictions) / temperature
    exp_preds = np.exp(predictions)
    predictions = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, predictions, 1)
    return np.argmax(probas)

class TextGenerator(keras.callbacks.Callback):
    def __init__(self,
                 prompt,
                 generate_length,
                 model_input_length,
                 temperatures=(1.,),
                 print_freq=1):
        self.prompt = prompt
        self.generate_length = generate_length
        self.model_input_length = model_input_length
        self.temperatures = temperatures
        self.print_freq = print_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.print_freq != 0:
            return
        for temperature in self.temperatures:
            print("== Generating with temperature", temperature)
            sentence = self.prompt
            for i in range(self.generate_length):
                tokenized_sentence = text_vectorization([sentence])
                predictions = self.model(tokenized_sentence)
                next_token = sample_next(predictions[0, i, :], temperature)
                sampled_token = tokens_index[next_token]
                sentence += " " + sampled_token
            print(sentence)

prompt = "This movie"
text_gen_callback = TextGenerator(
    prompt,
    generate_length=50,
    model_input_length=sequence_length,
    temperatures=(0.2, 0.5, 0.7, 1., 1.5))

In [55]:
# 코랩에서 정상 실행만 확인하기 위해 에포크 횟수를 200에서 10으로 줄입니다
model.fit(lm_dataset, epochs=10,  # 200
          callbacks=[text_gen_callback])

Epoch 1/10
== Generating with temperature 0.2
This movie movie                                                 
== Generating with temperature 0.5
This movie movie                                                 
== Generating with temperature 0.7
This movie movie                                                 
== Generating with temperature 1.0
This movie movie this                                                
== Generating with temperature 1.5
This movie movie 1010 agrees   neil  101 friends awards 1st anna film woody slavery snoop psychiatrist elijah doubles lee tales canal about eternity cent with drawer angela airplane chick following hated different mankind grass keystone decades chest believe fans years invited type dangerfield portraying women provide horror die cloth
Epoch 2/10
== Generating with temperature 0.2
This movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie mo

This movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie
== Generating with temperature 0.7
This movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie
== Generating with temperature 1.0
This movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie mov

This movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie movie


<keras.callbacks.History at 0x7f7663dc9850>

In [56]:

class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(input_dim=input_dim, output_dim=output_dim)
        self.position_embeddings = layers.Embedding(input_dim=sequence_length, output_dim=output_dim)
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim
        
    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions
    
    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "output_dim": self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
        })
        return config
    
class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config
    
    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1),
             tf.constant([1, 1], dtype=tf.int32)], axis=0)
        return tf.tile(mask, mult)
    
    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs)
        if mask is not None:
            padding_mask = tf.cast(
                mask[:, tf.newaxis, :], dtype="int32")
            padding_mask = tf.minimum(padding_mask, causal_mask)
        else:
            padding_mask = mask
        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=causal_mask)
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
        attention_output_2 = self.attention_2(
            query=attention_output_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
        )
        attention_output_2 = self.layernorm_2(
            attention_output_1 + attention_output_2)
        proj_output = self.dense_proj(attention_output_2)
        return self.layernorm_3(attention_output_2 + proj_output)
    

In [57]:
from tensorflow.keras import layers
embed_dim = 256
latent_dim = 2048
num_heads = 2

inputs = keras.Input(shape=(None,), dtype="int64")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)
x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, x)
outputs = layers.Dense(vocab_size, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(loss="sparse_categorical_crossentropy", optimizer="rmsprop")

In [58]:
# 코랩에서 정상 실행만 확인하기 위해 에포크 횟수를 200에서 10으로 줄입니다
model.fit(lm_dataset, epochs=10,  # 200
          callbacks=[text_gen_callback])

Epoch 1/10
== Generating with temperature 0.2
This movie movie is is not a a movie [UNK] that and is it not is only a one film of is the not worst the movie worst ever movie made ever i made have i ever have seen ever it seen is it the is worst the movie worst ever movie
== Generating with temperature 0.5
This movie movie is is not the a best movie movie i ever have seen ever i seen like it the is acting not is bad the it acting is is not terrible a the movie script is is very pretty good good [UNK] and acting the is acting barely is just
== Generating with temperature 0.7
This movie was is a not fantastic really film bad i acting could was never terrible seen i it find was it too was [UNK] to the be worst a film great i movie have i seen have i to have watch seen it this was movie very i funny can but
== Generating with temperature 1.0
This movie documentary has is no discovered new by york american the life one that point hitchcock is in that who every showed public the [UNK] space d

== Generating with temperature 0.2
This movie movie was was so so bad bad i it was was so so bad i i was was [UNK] thinking by that the it way was the going acting to was be bad bad but and the the acting acting was was bad bad the the plot acting was was
== Generating with temperature 0.5
This movie movie is was so a incredibly kid funny who and is a a very little funny too movie and to just watch be and entertained i by was the bored end [UNK] i  was laughing out loud at the end you and then i was watching it to the
== Generating with temperature 0.7
This movie movie was is amazing one the of acting the was best great i movie have i seen have in to a say movie not this least one made fits me the like plot most holes of in all the it movie is is so very much good older i than
== Generating with temperature 1.0
This movie film is didnt in work the but lives really the loved mood [UNK] it it great captures character the its vietnam just war maybe all mankind americans and may how be lea

<keras.callbacks.History at 0x7f7663dc99d0>