In [None]:
!pip install -q keras-nlp

In [None]:
import keras_nlp
import tensorflow as tf
from tensorflow import keras
import time

In [None]:
# To speed up training and generation, we use preprocessor of length 128
# instead of full length 1024.
preprocessor = keras_nlp.models.GPT2CausalLMPreprocessor.from_preset(
    "gpt2_base_en",
    sequence_length=128,
)
gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset(
    "gpt2_base_en", preprocessor=preprocessor
)



"Deer is able to "

In [None]:
start = time.time()

output = gpt2_lm.generate("Deer is able to", max_length=200)
print("\nGPT-2 output:")
print(output)

end = time.time()
print(f"TOTAL TIME ELAPSED: {end - start:.2f}s")




GPT-2 output:
Deer is able to move around with his feet. It is said that it is a form of magic that allows the Deer to move through the air and through objects.

In this form, Deer can move through a wide variety of environments, including a large forest, a small city, and even a small city with a few hundred residents. He can also use his feet to move through objects.

Contents show]

Appearance Edit

Deer is very tall and is very muscular. Deer wears a black shirt and blue jeans, with a dark red tie. He has a black beard and a black mustache that covers the left side of his face, and his hands have two black spikes protruding from the back of their fingers. He has short, dark hair with a black band on each end. He has a large, dark brown mustache and dark brown eyes.

In the anime, he has a dark blue hairline.

Personality
TOTAL TIME ELAPSED: 27.61s


Make wiki perposed dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')
#https://drive.google.com/file/d/12700bE-pomYKoVQ214VrpBoJ7akXcTpL/view?usp=share_link

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Read all txt files from wiki_data folder
folder_path = f"/content/drive/MyDrive/data/wiki_data"

In [None]:
import os
wiki_collection = []

# Iterate over the files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.txt'):  # Filter text files
        file_path = os.path.join(folder_path, filename)
        with open(file_path, 'r') as file:
            text = file.read()
            wiki_collection.append(text)


In [None]:
wiki_collection

['A deer  or true deer is a hoofed ruminant mammal of the family Cervidae. The two main groups of deer are the Cervinae, including muntjac, elk (wapiti), red deer, and fallow deer; and the Capreolinae, including reindeer (caribou), white-tailed deer, roe deer, and moose. Male deer of all species (except the water deer), as well as female reindeer, grow and shed new antlers each year. In this, they differ from permanently horned antelope, which are part of a different family (Bovidae) within the same order of even-toed ungulates (Artiodactyla).\n\nThe musk deer (Moschidae) of Asia and chevrotains (Tragulidae) of tropical African and Asian forests are separate families that are also in the ruminant clade Ruminantia; they are not especially closely related to Cervidae.\n\nDeer appear in art from Paleolithic cave paintings onwards, and they have played a role in mythology, religion, and literature throughout history, as well as in heraldry, such as red deer that appear in the coat of arms 

In [None]:
train_ds = (
    tf.data.Dataset.from_tensor_slices(wiki_collection)
    .batch(16)
    .cache()
    .prefetch(tf.data.AUTOTUNE)
)

In [None]:
# Running through the whole dataset takes long, only take `500` and run 1
# epochs for demo purposes.
#train_ds = train_ds.take(500)
num_epochs = 5

learning_rate = keras.optimizers.schedules.PolynomialDecay(
    5e-4,
    decay_steps=train_ds.cardinality() * num_epochs,
    end_learning_rate=0.0,
)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
gpt2_lm.compile(
    optimizer=keras.optimizers.Adam(learning_rate),
    loss=loss,
    weighted_metrics=["accuracy"],
)

gpt2_lm.fit(train_ds, epochs=num_epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3f79e9f0d0>

In [None]:
start = time.time()

output = gpt2_lm.generate("Deer is able to", max_length=100)
print("\nGPT-2 output:")
print(output)

end = time.time()
print(f"TOTAL TIME ELAPSED: {end - start:.2f}s")


GPT-2 output:
Deer is able to distinguish the deer from the deer (Deerus reus), but is more commonly referred to as a "deer antelope" because of their long, white, pointed horns. Deer deer are the largest antelope species, and they live on the antelope's back. Deer antelope are commonly kept as part of the family, but deer antelope can be further distinguished from other antelope by their antelope antelope horn, which is a horn of
TOTAL TIME ELAPSED: 18.38s


In [None]:
model_name = "test.h5"
model_path = "/content/drive/MyDrive/data/" + model_name
#tf.saved_model.save(gpt2_lm, model_path)
#tf.keras.saving.save_model(gpt2_lm, model_path)

gpt2_lm.save_weights("model_path")

Create model and load weights

In [None]:
from google.colab import drive
drive.mount('/content/drive')
model_name = "GPT2_keras_deer"
model_path = "/content/drive/MyDrive/data/" + model_name

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#model = tf.saved_model.load(model_path)
#type(model)

preprocessor = keras_nlp.models.GPT2CausalLMPreprocessor.from_preset(
    "gpt2_base_en",
    sequence_length=128,
)
model = keras_nlp.models.GPT2CausalLM.from_preset(
    "gpt2_base_en", preprocessor=preprocessor
)

model.load_weights('model_path')




<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f3f77f2a0b0>

In [None]:
start = time.time()

output = model.generate("Deer is able to", max_length=100)
print("\nGPT-2 output:")
print(output)

end = time.time()
print(f"TOTAL TIME ELAPSED: {end - start:.2f}s")


GPT-2 output:
Deer is able to be distinguished from deer, or from the other deer species. Deer are distinguished from their wild cousins by their large size. Deer are commonly referred to as deer, as their antlers grow and their tail is longer. Deer are commonly referred to as deer-eagle antelope, or deer antelope, or deer antelope antelope antelope, depending on the species and the context, but deer antelope are commonly distinguished by their antlers, which are similar in
TOTAL TIME ELAPSED: 17.55s


In [None]:
model.save("/content/drive/MyDrive/data/total.h5")