<a href="https://colab.research.google.com/github/vnguyen2011/VietAI-NTI_ChatGPTStreamlit/blob/main/%5BVietAI%5D_Finetune_llama2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Finetune Model

In [None]:
# Package installation and setup Autotrain
!pip install -q autotrain-advanced
!autotrain setup --update-torch
!pip install --upgrade accelerate
!pip install --upgrade dataset
# This process would take about 5-10 mins

In [None]:
# Login to your HF account
# You would get your HF token here: https://huggingface.co/settings/tokens
HF_TOKEN = ""

from huggingface_hub import login
login(HF_TOKEN)

In [None]:
# Load for investigate the training data (Optional)
from datasets import load_dataset
dataset = load_dataset("HoangHa/hello")
dataset

In [None]:
# Let's train
# 20 mins for finetuning this model with VietAI QA dataset
# By default, autotrain would find `text` column for training
!autotrain llm \
--train \
--project_name llama2-VietAI \
--model meta-llama/Llama-2-7b-hf \
--data_path HoangHa/hello \
--text_column text \
--repo_id HoangHa/llama2-VietAI \
--token HF_TOKEN \
--trainer sft \
--learning_rate 2e-4 \
--num_train_epochs 3 \
--push_to_hub \
--gradient-accumulation 16 \
--use_peft \
--use_int4 \
--lora_r 16 \
--lora_alpha 32 \
--lora_dropout 0.05 \
--target_modules q_proj,v_proj \
--train_batch_size 8 \
--block_size 256
# --merge_adapter # If you want to merge the trained adapter to your model
# --use_flash_attention_2 # If you have a suitable GPU (e.g. A100)


# Test model

**NOTE**: You need to restart the Notebook to run inference if there are any errors due to the memory

In [None]:
!pip install --upgrade transformers datasets accelerate peft bitsandbytes

In [None]:
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Config model
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Finetuned model
model_finetuned = AutoModelForCausalLM.from_pretrained(
    "HoangHa/llama2-VietAI-merged",
    device_map="auto",
    load_in_4bit=True,
    quantization_config=bnb_config,
    token = HF_TOKEN
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf",
                                          padding_side="left",
                                          token = HF_TOKEN)
tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default

# Generation pipeline finetuned
generate_finetuned = pipeline(
    model = model_finetuned,
    tokenizer = tokenizer,
    return_full_text = True,
    task = 'text-generation',
    temperature = 0.1,
    max_new_tokens = 64,
    repetition_penalty = 1.1
)

# ------------------------------------------
# Base model
model_base = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    device_map="auto",
    load_in_4bit=True,
    quantization_config=bnb_config,
    token = HF_TOKEN
)

# Generation pipeline base
generate_base = pipeline(
    model = model_base,
    tokenizer = tokenizer,
    return_full_text = True,
    task = 'text-generation',
    temperature = 0.1,
    max_new_tokens = 128,
    repetition_penalty = 1.1
)

# Output
print("Finetuned Model:")
input_text = """
VietAI dạy những môn cơ bản nào?
"""
res = generate_finetuned(input_text)
print(res[0]['generated_text'])

print("Base model:")
input_text = """
VietAI dạy những môn cơ bản nào?
"""
res = generate_base(input_text)
print(res[0]['generated_text'])