# Install and update all the necessary Python packages.

In [None]:
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb


# Load the Python packages and functions we will use throughout the fine-tuning and evaluation process.

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

# Hugging Face access token

In [None]:
read_token='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'
write_token='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'

# read_token='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'
# write_token='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'

# Install huggingface_hub

In [None]:
!pip install huggingface_hub



# Log in to Hugging Face CLI using the API key.

In [None]:
# from huggingface_hub import login
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()

# hf_token = user_secrets.get_secret("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")
# login(token = hf_token)
from huggingface_hub import login
from getpass import getpass

# Prompt the user to enter their Hugging Face token securely
hf_token = getpass("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")

# Log in to Hugging Face using the provided token
login(token=hf_token)



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··········
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Login to Weights & Biases using the API key and instantiate the new project.

In [None]:
#wb_token = user_secrets.get_secret("wandb")
wandb.login(key='fe579a1e3641448ce3a855814affb7dcb229a7f0')
run = wandb.init(
    project='Fine-tune Llama 3.2 3B on Finance Dataset',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mzafor4558[0m ([33mzafor4558-east-west-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


# Set the variables for base mode, dataset, and new model name. We will be using them in multiple places in this project, so it is better to set them at the start to avoid confusion

In [None]:
base_model = "BanglaLLM/BanglaLLama-3.2-3b-bangla-alpaca-orca-instruct-v0.0.1"
new_model = "BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1"
dataset_name = "/content/Bangla_FinGpt.xlsx"

#  Loading the model and tokenizer
Set the data type and attention implementation.

In [None]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"


# Load the model and tokenizer by providing the local model directory. Even though our model is small, loading the full model and fine-tuning it will take some time. Instead, we will load the model in 4-bit quantization.

In [None]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/917 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.25G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/180 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/449 [00:00<?, ?B/s]

# Loading and processing the dataset

In [None]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)  # Replace "gpt2" with your model as needed

# Load the Excel file using pandas
dataset_name = "/content/Bangla_FinGpt.xlsx"
df = pd.read_excel(dataset_name)

# Convert the pandas DataFrame to a Hugging Face Dataset
dataset = Dataset.from_pandas(df)

# Shuffle and select a subset of the dataset
dataset = dataset.shuffle(seed=65).select(range(1000))

# Define the instruction
instruction = """You are a top-rated Finance AI assistant"""

# Define the chat formatting function
def format_chat_template(row):
    # Manually format the chat conversation
    chat_formatted = f"System: {instruction}\n\nUser: {row['Question']}\n\nAssistant: {row['Answer']}\n\n"
    # Tokenize the formatted chat (if desired)
    row["text"] = chat_formatted
    return row

# Apply the formatting function to the dataset
dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

# Display a sample to check formatting
print(dataset[4]["text"])


Map (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

System: You are a top-rated Finance AI assistant

User: কোন অবস্থায় ক্রেডিট নোট এবং ডেবিট নোট জারি করতে হয়?

Assistant: "ক্রেডিট নোট" অথবা "ডেবিট নোট" জারি করতে হবে বিধি ২৭ এর বিধান সাপেক্ষে, যদি ইস্যুকৃত এক বা একাধিক কর চালানপত্রে সংশোধনী সংশ্লিষ্ট হয় এবং নিবন্ধিত ব্যক্তি হ্রাসকারী সমন্বয় সাধন করতে চান।




In [None]:
df.head()


Unnamed: 0,QuestionID,Type,Topic,Context,Question,Answer
0,1,Bangla,Customs,১। সংক্ষিপ্ত শিরোনাম ও প্রবর্তন। (১) এই আইন কা...,কাস্টমস কি?,কাস্টমস হলো এমন একটি স্থান বা সংস্থা যেখানে বন...
1,2,English,Customs,1. Short title and introduction. (1) This Act ...,What is customs?,Customs is a place or organization where autho...
2,3,Bangla,Customs,১। সংক্ষিপ্ত শিরোনাম ও প্রবর্তন। (১) এই আইন কা...,কোন পদে কাস্টমস কর্মকর্তা নিয়োগ প্রদান হতে পারে?,কাস্টমস কর্মকর্তা নিয়োগ প্রদান হতে পারে কমিশনা...
3,4,English,Customs,1. Short title and introduction. (1) This Act ...,What positions can be given to the appointment...,The appointment of Customs officers may be Com...
4,5,Bangla,Customs,১। সংক্ষিপ্ত শিরোনাম ও প্রবর্তন। (১) এই আইন কা...,দলবিধি কী?,"দলবিধি মূলত একটি আইন, এখানে ইংরেজি হিসেবে ""Pen..."


In [None]:
dataset['text'][3]

'System: You are a top-rated Finance AI assistant\n\nUser: ধারা 163 (5) অনুযায়ী ব্যবসায়িক আয়ের উপর ন্যূনতম কর কত?\n\nAssistant: ধারা 163 (5) অনুযায়ী ব্যবসায়িক আয়ের উপর সর্বনিম্ন কর প্রদত্ত সূত্রের ভিত্তিতে গণনা করা হয়।\n\n'

# Setting up the model
Extract the linear model name from the model.

In [None]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

# Use the linear module name to create the LoRA adopter. We will only fine-tune the LoRA adopter and leave the rest of the model to save memory and for faster training time.

In [None]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


# We are configuring the model hyperparameters to run it

In [None]:
#Hyperparamter
# training_arguments = TrainingArguments(
#     output_dir=new_model,
#     per_device_train_batch_size=1,
#     per_device_eval_batch_size=1,
#     gradient_accumulation_steps=2,
#     optim="paged_adamw_32bit",
#     num_train_epochs=1,
#     eval_strategy="steps",
#     eval_steps=0.2,
#     logging_steps=1,
#     warmup_steps=10,
#     logging_strategy="steps",
#     learning_rate=2e-4,
#     fp16=False,
#     bf16=False,
#     group_by_length=True,
#     report_to="wandb"
# )

training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,                  # Training for 1 epoch
    eval_strategy="epoch",               # Evaluate at the end of each epoch
    max_steps=-1,                        # No early stopping based on steps
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)


# We will now set up a supervised fine-tuning (SFT) trainer and provide a train and evaluation dataset, LoRA configuration, training argument, tokenizer, and model.

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into train and test
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

# Convert pandas DataFrames to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# Ensure each dataset has the formatted text field
train_dataset = train_dataset.map(format_chat_template, num_proc=4)
test_dataset = test_dataset.map(format_chat_template, num_proc=4)

# Now set up the SFTTrainer with the split datasets
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)


Map (num_proc=4):   0%|          | 0/9370 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/1042 [00:00<?, ? examples/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/9370 [00:00<?, ? examples/s]

Map:   0%|          | 0/1042 [00:00<?, ? examples/s]

#Model training
Start the training process and monitor the training and validation loss metrics.

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,2.5204,0.97734


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=4685, training_loss=1.0484937710435056, metrics={'train_runtime': 4992.3364, 'train_samples_per_second': 1.877, 'train_steps_per_second': 0.938, 'total_flos': 2.779379519106048e+16, 'train_loss': 1.0484937710435056, 'epoch': 1.0})

In [None]:
wandb.finish()

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇████
train/grad_norm,▃▅▂▅▆▃▃▅▁▅▅▁▁▅▇▂▂▅▂▄▂▆▆▂▅▅▂▇▂▁▄▄▂▇▆▂▆▆▆█
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁
train/loss,▂█▂▆▆▂▂▁▆█▂▂▅▇▄▅▂▅▂▅▂▄▇▁▂▆▆▁▂▅▂▆▁▁▆▁▁▂▅▁

0,1
eval/loss,0.97734
eval/runtime,213.0984
eval/samples_per_second,4.89
eval/steps_per_second,4.89
total_flos,2.779379519106048e+16
train/epoch,1.0
train/global_step,4685.0
train/grad_norm,3.34684
train/learning_rate,0.0
train/loss,2.5204


In [None]:
messages = [{"role": "system", "content": "Please provide a detailed explanation about electronic record storage."},
            {"role": "user", "content": "কীভাবে ইলেকট্রনিক রেকর্ড সংরক্ষণ করা হয়?"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=1000, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1

/content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1


In [None]:
!zip -r /content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1.zip /content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1


  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/ (stored 0%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/ (stored 0%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/trainer_state.json (deflated 83%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/optimizer.pt (deflated 9%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/tokenizer.json (deflated 85%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/training_args.bin (deflated 51%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/special_tokens_map.json (deflated 66%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_finetuned-instruct-v0.0.1/checkpoint-2500/tokenizer_config.json (deflated 96%)
  adding: content/BanglaLLama-3.2-3b-banglafingpt_fine

In [None]:
# Save the fine-tuned model
# trainer.model.save_pretrained(new_model)
# trainer.model.push_to_hub(new_model, use_temp_dir=False)

In [None]:
# # Merge adapter with base model
# base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)
# # Ensure that 'new_model' points to the correct directory containing the adapter configuration.
# # For example, if your adapter is stored in a subdirectory within 'new_model'
# # called 'adapter_dir', you would modify the path:
# # model = PeftModel.from_pretrained(base_model_reload, f"{new_model}/adapter_dir")
# #
# # If the adapter configuration is directly within 'new_model', the following should work:

# model = PeftModel.from_pretrained(base_model_reload, new_model)

model = model.merge_and_unload()



In [None]:
instruction = """You are a top-rated customer service agent named John.
    Be polite to customers and answer all their questions.
    """

messages = [{"role": "system", "content": instruction},
    {"role": "user", "content": "I have to see what payment payment modalities are accepted"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [None]:
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

In [None]:
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)