# Week 7: Fine-tuning Llama by Q-lora

In [None]:
%%capture output

! pip install datasets
! pip install peft==0.4.0
! pip install bitsandbytes==0.40.2
! pip install accelerate==0.21.0
! pip install trl==0.4.7
! pip install transformers
! pip install pypdfium2
! pip install faiss-gpu
! pip install langchain
! pip install rouge

## 1. Data Loading

In [None]:
import os, pandas as pd
from google.colab import drive

# Access drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Capstone/'

file = os.path.join(path, 'Summarization_Sample', 'prompt_sample.csv')

df = pd.read_csv(file)
df.head(5)

Mounted at /content/drive


Unnamed: 0,Prompts,Context,Optimizations
0,Summarize the company's performances and futur...,[Document(page_content='Group chief executive’...,"Summarize the company's safety performance, ke..."
1,Summarize the company's performances and futur...,"[Document(page_content='In every instance, Che...",Summarize Chevron's CEO Michael Wirth's letter...
2,Summarize the company's performances and futur...,"[Document(page_content='In 2018, we laid out a...",Summarize ExxonMobil's CEO Darren Woods' lette...
3,Summarize the company's performances and futur...,[Document(page_content=' MARATHON PETROLEUM CO...,Summarize Marathon Petroleum Corporation's CEO...
4,Summarize the company's performances and futur...,[Document(page_content='SHELL ANNUAL REPORT AN...,Summarize Shell's CEO Ben van Beurden's letter...


### prompt reformulation

In [None]:
df['text'] = f"<s>[INST] <<SYS>>Optimize the prompt below according to context. " + \
                "\n[context]: " + df['Context'] + \
                "\n[prompt]: " + df['Prompts'] + \
                "\n[optimization]: " + df['Optimizations'] + \
                "[\INST]"
df.head()

Unnamed: 0,Prompts,Context,Optimizations,text
0,Summarize the company's performances and futur...,[Document(page_content='Group chief executive’...,"Summarize the company's safety performance, ke...",<s>[INST] <<SYS>>Optimize the prompt below acc...
1,Summarize the company's performances and futur...,"[Document(page_content='In every instance, Che...",Summarize Chevron's CEO Michael Wirth's letter...,<s>[INST] <<SYS>>Optimize the prompt below acc...
2,Summarize the company's performances and futur...,"[Document(page_content='In 2018, we laid out a...",Summarize ExxonMobil's CEO Darren Woods' lette...,<s>[INST] <<SYS>>Optimize the prompt below acc...
3,Summarize the company's performances and futur...,[Document(page_content=' MARATHON PETROLEUM CO...,Summarize Marathon Petroleum Corporation's CEO...,<s>[INST] <<SYS>>Optimize the prompt below acc...
4,Summarize the company's performances and futur...,[Document(page_content='SHELL ANNUAL REPORT AN...,Summarize Shell's CEO Ben van Beurden's letter...,<s>[INST] <<SYS>>Optimize the prompt below acc...


In [None]:
from datasets import Dataset
dataset = Dataset.from_pandas(df)
dataset

Dataset({
    features: ['Prompts', 'Context', 'Optimizations', 'text'],
    num_rows: 27
})

## 2. Model & Tokenizer Loading with QLoRA Requirement

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoModelForSeq2SeqLM


access_token = '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'
model_id = 'google/flan-t5-small'
new_model_id = 'flan-t5-small-sum-prompt'


# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)

In [None]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token = access_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Load base model
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    token = access_token,
    quantization_config=bnb_config,
    device_map = {"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]



generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

## 3. QLoRA Configuration

In [None]:
from peft import LoraConfig

# Load LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

## 4. Training Configuration

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

output_dir = os.path.join(path, 'T5_prompt_sum_tuned')

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    optim='paged_adamw_32bit',
    save_steps=10,
    logging_steps=10,
    learning_rate=1e-3,
    weight_decay=0.001,
    fp16=False,
    bf16=False, # set to true if using A100
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type='constant',
    report_to="tensorboard"
)


trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)



Map:   0%|          | 0/27 [00:00<?, ? examples/s]

## Training and Saving

In [None]:
trainer.train()

save_path = os.path.join(output_dir, new_model_id)
trainer.model.save_pretrained(save_path)

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,0.3314




In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/drive/MyDrive/Capstone/T5_prompt_sum_tuned/runs