In [None]:
! pip install wyn-transformers

In [2]:
import wyn_transformers
from wyn_transformers.transformers import *

# Hyperparameters
num_layers = 2
d_model = 64
dff = 128
num_heads = 4
input_vocab_size = 8500
maximum_position_encoding = 10000

# Instantiate the Transformer model
transformer = TransformerModel(num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding)

# Compile the model
transformer.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generate random sample data
sample_data = np.random.randint(0, input_vocab_size, size=(64, 38))

# Fit the model on the random sample data
transformer.fit(sample_data, sample_data, epochs=5)

Epoch 1/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 499ms/step - accuracy: 0.0000e+00 - loss: 16.0640
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 490ms/step - accuracy: 0.0000e+00 - loss: 15.4381
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 479ms/step - accuracy: 5.4825e-04 - loss: 14.7394
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 497ms/step - accuracy: 0.0027 - loss: 14.1677
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 605ms/step - accuracy: 0.0019 - loss: 13.9759


<keras.src.callbacks.history.History at 0x7ed9f79c6f50>

In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Create a sample pandas DataFrame
data = {
    # input: X
    'question': [
        'What is the capital of France?',
        'How many continents are there?',
        'What is the largest mammal?',
        'Who wrote the play Hamlet?'
    ],
    # output: Y
    'answer': [
        'The capital of France is Paris.',
        'There are seven continents.',
        'The blue whale is the largest mammal.',
        'William Shakespeare wrote Hamlet.'
    ]
}

# Or read it from a directory
# data = pd.DataFrame("test.csv")

df = pd.DataFrame(data)
df

Unnamed: 0,question,answer
0,What is the capital of France?,The capital of France is Paris.
1,How many continents are there?,There are seven continents.
2,What is the largest mammal?,The blue whale is the largest mammal.
3,Who wrote the play Hamlet?,William Shakespeare wrote Hamlet.


In [68]:
# Initialize the Tokenizer
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")

# Fit the tokenizer on the questions and answers
tokenizer.fit_on_texts(df['question'].tolist() + df['answer'].tolist())

# Convert texts to sequences
question_sequences = tokenizer.texts_to_sequences(df['question'].tolist())
answer_sequences = tokenizer.texts_to_sequences(df['answer'].tolist())

# Pad sequences to ensure consistent input size for the model
max_length = 7  # Example fixed length; this can be adjusted as needed
question_padded = pad_sequences(question_sequences, maxlen=max_length, padding='post')
answer_padded = pad_sequences(answer_sequences, maxlen=max_length, padding='post')

# Combine questions and answers for training
sample_data = np.concatenate((question_padded, answer_padded), axis=0)

# Display the prepared sample data
print("Sample data (tokenized and padded):\n", sample_data)

Sample data (tokenized and padded):
 [[ 4  3  2  5  6  7  0]
 [15 16  8  9 10  0  0]
 [ 4  3  2 11 12  0  0]
 [17 13  2 18 14  0  0]
 [ 2  5  6  7  3 19  0]
 [10  9 20  8  0  0  0]
 [ 2 21 22  3  2 11 12]
 [23 24 13 14  0  0  0]]


In [69]:
import tensorflow as tf
from wyn_transformers.inference import *

# Testing the function to convert back to text
print("Original token:")
print(question_padded)
print("\nConverted back to text (questions):")
print(sequences_to_text(question_padded, tokenizer))

print("Original token:")
print(answer_padded)
print("\nConverted back to text (answers):")
print(sequences_to_text(answer_padded, tokenizer))

Original token:
[[ 4  3  2  5  6  7  0]
 [15 16  8  9 10  0  0]
 [ 4  3  2 11 12  0  0]
 [17 13  2 18 14  0  0]]

Converted back to text (questions):
['what is the capital of france', 'how many continents are there', 'what is the largest mammal', 'who wrote the play hamlet']
Original token:
[[ 2  5  6  7  3 19  0]
 [10  9 20  8  0  0  0]
 [ 2 21 22  3  2 11 12]
 [23 24 13 14  0  0  0]]

Converted back to text (answers):
['the capital of france is paris', 'there are seven continents', 'the blue whale is the largest mammal', 'william shakespeare wrote hamlet']


In [72]:
# Hyperparameters
num_layers = 2
d_model = 64
dff = 128
num_heads = 2
input_vocab_size = 400
maximum_position_encoding = 10000

# Instantiate the Transformer model
transformer = TransformerModel(num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding)

# Compile the model
transformer.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fit the model on the custom sample data
transformer.fit(sample_data, sample_data, epochs=400)

Epoch 1/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9s/step - accuracy: 0.0000e+00 - loss: 12.0465
Epoch 2/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.0000e+00 - loss: 9.5926
Epoch 3/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.1250 - loss: 9.0890
Epoch 4/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.1786 - loss: 8.1515
Epoch 5/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.2321 - loss: 7.7422
Epoch 6/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.2679 - loss: 7.4757
Epoch 7/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.2679 - loss: 6.7560
Epoch 8/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.3036 - loss: 6.6273
Epoch 9/400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7ed9cc28ab00>

In [74]:
# Test the function with the example input
input_text = "Who wrote Hamlet?"
predicted_response = predict_text(input_text, transformer, tokenizer, max_length=20)
print("Predicted Response:", predicted_response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predicted Response: shakespeare wrote hamlet


In [76]:
from wyn_transformers.push_to_hub import *

In [77]:
from google.colab import userdata

In [78]:
# Example usage:
huggingface_token = userdata.get('HF_TOKEN')
account_name = "eagle0504"
model_name = "pretrained_transformer_model_v1"

# Call the function to push the model
# result = push_model_to_huggingface(huggingface_token, account_name, transformer, model_name)
result = push_model_to_huggingface(huggingface_token, account_name, transformer, model_name, tokenizer)
print(result)

tmp_model.keras:   0%|          | 0.00/1.53M [00:00<?, ?B/s]

vocab.pkl:   0%|          | 0.00/258 [00:00<?, ?B/s]

- empty or missing yaml metadata in repo card


Model, config, tokenizer, and model card pushed successfully to Hugging Face Hub with YAML metadata. 
Please go to this URL: https://huggingface.co/eagle0504/pretrained_transformer_model_v1


In [79]:
from huggingface_hub import hf_hub_download
import tensorflow as tf
import os
import json
import pickle

# Define the Hugging Face model repository path
model_repo_url = f"{account_name}/{model_name}"

# Step 1: Download the model file from Hugging Face
model_filename = f"{model_name}.keras"
model_file_path = hf_hub_download(repo_id=model_repo_url, filename=model_filename, use_auth_token=huggingface_token)

# Step 2: Load the pre-trained model from the downloaded file
pre_trained_transformer = tf.keras.models.load_model(model_file_path, custom_objects={"TransformerModel": TransformerModel})

# Step 3: Compile the model to prepare for further training
pre_trained_transformer.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Step 4: Reload the tokenizer (if used) by downloading tokenizer files from Hugging Face
tokenizer_config_path = hf_hub_download(repo_id=model_repo_url, filename="tokenizer_config.json", use_auth_token=huggingface_token)
vocab_path = hf_hub_download(repo_id=model_repo_url, filename="vocab.pkl", use_auth_token=huggingface_token)

# Load the tokenizer configuration from the downloaded file
with open(tokenizer_config_path, "r") as f:
    tokenizer_config = json.load(f)

# Recreate the tokenizer using TensorFlow's Tokenizer class
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(
    num_words=tokenizer_config.get("num_words"),
    filters=tokenizer_config.get("filters"),
    lower=tokenizer_config.get("lower"),
    split=tokenizer_config.get("split"),
    char_level=tokenizer_config.get("char_level")
)
tokenizer.word_index = tokenizer_config.get("word_index")
tokenizer.index_word = tokenizer_config.get("index_word")

# Load the vocabulary from the pickle file
with open(vocab_path, "rb") as f:
    tokenizer.word_index = pickle.load(f)

# Clean up downloaded files
os.remove(tokenizer_config_path)
os.remove(vocab_path)

pretrained_transformer_model_v1.keras:   0%|          | 0.00/1.53M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/812 [00:00<?, ?B/s]

vocab.pkl:   0%|          | 0.00/258 [00:00<?, ?B/s]

In [80]:
# Prepare your sample_data and call fit again
pre_trained_transformer.fit(sample_data, sample_data, epochs=5)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step - accuracy: 0.8214 - loss: 1.8700
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7857 - loss: 1.8781
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.8571 - loss: 1.4830
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.8750 - loss: 1.2145
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.8571 - loss: 1.1532


<keras.src.callbacks.history.History at 0x7ed9ca6cfee0>

In [89]:
# Test the function with the example input
input_text = "what's capital of france?"
predicted_response = predict_text(input_text, pre_trained_transformer, tokenizer, max_length=20)
print("Predicted Response:", predicted_response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Predicted Response: capital hamlet france
