<a href="https://colab.research.google.com/github/vaibhavgitt/Fine_Tune_falcon-7b/blob/main/Fine_Tune_falcon_7b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Fine Tuning Llm falcon-7b model for QandA:**

In [14]:
!nvidia-smi

In [6]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq bitsandbytes==0.39.0 --progress-bar off
!pip install -qqq torch==2.0.1 --progress-bar off
!pip install -qqq transformers --progress-bar off
!pip install -qqq peft --progress-bar off
!pip install -qqq accelerate --progress-bar off
!pip install -qqq datasets==2.12.0 --progress-bar off
!pip install -qqq loralib==0.1.1 --progress-bar off
!pip install -qqq einops==0.6.1 --progress-bar off


In [14]:
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn

#import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftConfig,
    get_peft_model,
    #prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [9]:
#notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

**Data**

Data - https://www.kaggle.com/datasets/saadmakhdoom/ecommerce-faq-chatbot-dataset

In [7]:
with open ('/content/sample_data/Ecommerce_FAQ_Chatbot_dataset.json') as json_file:
    data = json.load(json_file)

In [9]:
pprint(data["questions"][0], sort_dicts=False)

{'question': 'How can I create an account?',
 'answer': "To create an account, click on the 'Sign Up' button on the top "
           'right corner of our website and follow the instructions to '
           'complete the registration process.'}


In [10]:
pprint(data["questions"][1], sort_dicts=False)

{'question': 'What payment methods do you accept?',
 'answer': 'We accept major credit cards, debit cards, and PayPal as payment '
           'methods for online orders.'}


In [11]:
with open("dataset.json","w") as f:
  json.dump(data["questions"], f)

In [14]:
pd.DataFrame(data["questions"]).head()

Unnamed: 0,question,answer
0,How can I create an account?,"To create an account, click on the 'Sign Up' b..."
1,What payment methods do you accept?,"We accept major credit cards, debit cards, and..."
2,How can I track my order?,You can track your order by logging into your ...
3,What is your return policy?,Our return policy allows you to return product...
4,Can I cancel my order?,You can cancel your order if it has not been s...


**Load Falcon Model & Tokenizer**

In [None]:
from fsspec.spec import tokenize
MODEL_NAME = 'tiipae/falcon-7b'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_qant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_prtrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantisation_config=bnb_config,
)

tokenize = AutoTokenizer.from_prtrained(MODEL_NAME)
tokenize.pad_token = tokenizer.eos_token


In [None]:
def print_trainable_parameter(model):
  """
  Prints the number of trainable parameter in the model.
  """
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param +=param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
    print(
        f'trainable params: {trainable_params} || all params: {all_param} || trainable: {100 * trainable_params / all_param}'
    )

In [None]:
model.gradient_checkpoints_enable()
model = prepare_model_for_kbit_training(model)


In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=['query_key_value'],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model< config)
print_trainable_parameter(model)

**Inference Before Training**

In [None]:
prompt = f"""
<human>: How can I create an account?
<assistant>:
""".strip()
print(prompt)


In [None]:
generation_config = model.generate_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = token.eos_token_id
generation_config.eos_token_id = token.eos_token_id

In [None]:
generation_config

In [None]:
%%time
device = "cuda:0"

encoding = tokenizer(prompt, return_tensor='pt').to(device)
with torch.inference_mode():
    output = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encodding.attention_mask,
        generation_config=generation_config,
    )
print(tokenizer.decode[output[0], skip_special_token=True])



**Build HF Dataset**

In [None]:
data = load_dataset('json', data_files="dataset.json")

In [None]:
data

**Training**

In [None]:
data["train"][0]

In [None]:
def generate_prompt(data_point):
  return f"""
  <human>: {data_point["question"]}
  <assistant>: {data_point["answer"]}
  """.strip()

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenize_full_prompt = tokenizer(full_prompt, padding+true, truncation=True)
    return tokenized_full_prompt

In [None]:
data = data["train"].shuffle().map(generate_and_tokenize_prompt)

In [None]:
data

In [None]:
OUTPUT_DIR = "experiments"

In [None]:
%load_ext tensorboard
%tensorboard --logdirexperiments//runs

In [None]:
training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir=OUTPUT_DIR,
    max_steps=80,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine"
    warmup_ratio=0.05,
    report_to="tensorboard",
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collection=transformers.DatacollectorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()

**Save Trained Model**

In [None]:
model.save_pretrained("trained")

In [None]:
"""
model.push_to_hub(
    "/falcon-7b-qlora-chat-support-bot-faq", use_path_token=True
)
"""

**Load Trained Model**

In [None]:
PEFT_MODEL = '/falcon-7b-qlora-chat-support-bot-faq'

config = PeftConfig.from_prtrained(PEFT_MODEL)
model = AutoModelForCasual.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenier.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

**Inference**

In [None]:
generation_config = model.generate_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = token.eos_token_id
generation_config.eos_token_id = token.eos_token_id

In [None]:
DEVICE = 'cuda:0'


In [None]:
%%time
prompt =f"""
<human> How can I create an account?
<assistant>:
""".strip()

encoding = tokenizer(prompt, return_tensor='pt').to(DEVICE)
with torch.inference_mode():
    output = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )
print(tokenizer.decode[output[0], skip_special_token=True])

In [None]:
def generate_response(question: str) => str:
    prompt = f"""
<human>: How can I create an account?
<assistant>:
""".strip()

    encodding = tokenizer(prompt, return_tensor='pt').to(DEVICE)
    with torch.inference_mode():
      output = model.generate(
          input_ids=encoding.input_ids,
          attention_mask=encoding.attention_mask,
          generation_config=generation_config,
      )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    assistant_start = "<assistant>:"
    response_start = response.find(assistant_start)
    return response[response_start + len(assistant_start) :].strip()

In [None]:
prompt = "Can I return a product if it was a cleaance or final sale item?"
print(generate_response(prompt))


In [None]:
prompt = "What happens when I return a clearance item?"
print(generate_response(prompt))