### **Fine tune framwork**: Lora  
### **Dataset**: QA pairs generated by GPT3.5 on Chevron Company report
### **Based Model**: Llama


In [None]:
%%capture output

! pip install datasets
! pip install peft==0.4.0
! pip install bitsandbytes==0.40.2
! pip install accelerate==0.21.0
! pip install trl==0.4.7
! pip install transformers
! pip install pypdfium2
! pip install faiss-gpu
! pip install langchain
! pip install rouge

In [None]:
# from huggingface_hub import notebook_login
from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType
from trl import SFTTrainer

## Load the training dataset

In [None]:
from google.colab import drive
# Access drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Capstone/'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Capstone/QA_Sample/Chevron_2018.csv')
df.head(5)

Unnamed: 0,Questions,Context,Answers
0,What is Chevron Corporation's address in San R...,2018 \nannual report\n140 years of human p...,"Chevron Corporation's address in San Ramon, CA..."
1,How many metric tons of carbon dioxide are exp...,2018 \nannual report\n140 years of human p...,"Over the life of the Gorgon facility, it is ex..."
2,What are some of the competitive advantages of...,2018 \nannual report\n140 years of human p...,Chevron Corporation has several competitive ad...
3,What is the purpose of the Gorgon Carbon Dioxi...,2018 \nannual report\n140 years of human p...,The purpose of the Gorgon Carbon Dioxide Injec...
4,How does Chevron Corporation contribute to red...,2018 \nannual report\n140 years of human p...,Chevron Corporation contributes to reducing gr...


In [None]:
df['text'] = f"<s>[INST] <<SYS>>Answer the question based on the context below. " + \
                "\n[context]: " + df['Context'] + \
                "\n[question]: " + df['Questions'] + \
                "\n[answer]: " + df['Answers'] + \
                "[\INST]"
df.head()

Unnamed: 0,Questions,Context,Answers,text
0,What is Chevron Corporation's address in San R...,2018 \nannual report\n140 years of human p...,"Chevron Corporation's address in San Ramon, CA...",<s>[INST] <<SYS>>Answer the question based on ...
1,How many metric tons of carbon dioxide are exp...,2018 \nannual report\n140 years of human p...,"Over the life of the Gorgon facility, it is ex...",<s>[INST] <<SYS>>Answer the question based on ...
2,What are some of the competitive advantages of...,2018 \nannual report\n140 years of human p...,Chevron Corporation has several competitive ad...,<s>[INST] <<SYS>>Answer the question based on ...
3,What is the purpose of the Gorgon Carbon Dioxi...,2018 \nannual report\n140 years of human p...,The purpose of the Gorgon Carbon Dioxide Injec...,<s>[INST] <<SYS>>Answer the question based on ...
4,How does Chevron Corporation contribute to red...,2018 \nannual report\n140 years of human p...,Chevron Corporation contributes to reducing gr...,<s>[INST] <<SYS>>Answer the question based on ...


In [None]:
len(df)

587

In [None]:
from datasets import load_dataset
from datasets import Dataset
dataset = Dataset.from_pandas(df)
dataset

Dataset({
    features: ['Questions', 'Context', 'Answers', 'text'],
    num_rows: 587
})

In [None]:
dataset = dataset.train_test_split(test_size=0.1)

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Questions', 'Context', 'Answers', 'text'],
        num_rows: 528
    })
    test: Dataset({
        features: ['Questions', 'Context', 'Answers', 'text'],
        num_rows: 59
    })
})

## Tokenized the data

In [None]:
from transformers import AutoTokenizer
access_token = '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'
model_id = 'meta-llama/Llama-2-7b-chat-hf'
new_model_id = 'llama-2-7b-qa-lora'
tokenizer = AutoTokenizer.from_pretrained(model_id, token = access_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
def preprocess_function(sample):
    # add prefix to the input for t5
    text =  sample['Context'][0] + sample['Questions'][0] + sample['Answers'][0]

    # tokenize inputs
    tokenizer.pad_token = tokenizer.eos_token
    tokenized_inputs = tokenizer(
        text,
        padding=True,
        return_tensors='np'
        )

    max_length = min(tokenized_inputs["input_ids"].shape[1],
                     2048)

    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,
        return_tensors = "np",
        truncation=True,
        max_length=max_length
        )

    return tokenized_inputs


In [None]:
tokenized_train_dataset = dataset['train'].map(
    preprocess_function,
    batched=True,
    batch_size=1,
    drop_last_batch=True
)

tokenized_test_dataset = dataset['test'].map(
    preprocess_function,
    batched=True,
    batch_size=1,
    drop_last_batch=True
)

Map:   0%|          | 0/528 [00:00<?, ? examples/s]

Map:   0%|          | 0/59 [00:00<?, ? examples/s]

In [None]:
print(tokenized_train_dataset)

Dataset({
    features: ['Questions', 'Context', 'Answers', 'text', 'input_ids', 'attention_mask'],
    num_rows: 528
})


## Load the model

In [None]:
from transformers import (
    Trainer,
    AutoModel,
    AutoModelForCausalLM,
    AutoModelForQuestionAnswering,
    AutoTokenizer,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    AutoModelForSeq2SeqLM,
    BitsAndBytesConfig,
    TrainingArguments,
    get_linear_schedule_with_warmup,
    default_data_collator,
    DataCollatorForSeq2Seq,

)


In [None]:
import torch
from langchain.llms import HuggingFacePipeline
# from transformers import AutoModelForQuestionAnswering
# model_name =[ "meta-llama/Llama-2-7b-chat-hf",
#              "TheBoke/Llama-2-13B-chat-GGML",
#              "mistralai/Mistral-7B-Instruct-v0.1",
#               "google/flan-t5-large",
#               "google/flan-t5-base",
#               "EletherAI/pythia-70m",
#               "EleutherAI/gpt-neo-125m"]
# model = AutoModelForCausalLM.from_pretrained(model_name[0])
# model = AutoModelForCausalLM.from_pretrained(model_name[1])
# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    use_safetensors=True,
    token = access_token,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map ={"": 0},
)
model.config.use_cache = False
model.config.pretraining_tp = 1


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



## Training

### Training config

In [None]:
import os
output_dir = os.path.join(path, 'Llama_tuned')

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    optim='paged_adamw_32bit',
    save_steps=25,
    logging_steps=25,
    learning_rate=1e-3,
    weight_decay=0.001,
    fp16=False,
    bf16=False, # set to true if using A100
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type='constant',
    report_to="tensorboard"
)

### Lora config
lora_r: Rank of Decomposition matrix. (4,8,16,32) \\
lora_alpha:scaling constant  \\
target_modules: **query(q), value(v)**, key(k), multi-attention(o) \\
task_type: CAUSAL_LM, FEATURE_EXTRACTION, QUESTION_ANS, SEQ_2_SEQ_LM, SEQ_CLS and TOKEN_CLS



In [None]:
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
    "t5": ["q", "v"],
    "mt5": ["q", "v"],
    "bart": ["q_proj", "v_proj"],
    "gpt2": ["c_attn"],
    "bloom": ["query_key_value"],
    "blip-2": ["q", "v", "q_proj", "v_proj"],
    "opt": ["q_proj", "v_proj"],
    "gptj": ["q_proj", "v_proj"],
    "gpt_neox": ["query_key_value"],
    "gpt_neo": ["q_proj", "v_proj"],
    "bert": ["query", "value"],
    "roberta": ["query", "value"],
    "xlm-roberta": ["query", "value"],
    "electra": ["query", "value"],
    "deberta-v2": ["query_proj", "value_proj"],
    "deberta": ["in_proj"],
    "layoutlm": ["query", "value"],
    "llama": ["q_proj", "v_proj"],
    "chatglm": ["query_key_value"],
    "gpt_bigcode": ["c_attn"],
    "mpt": ["Wqkv"],
}

In [None]:
lora_r = 64
lora_alpha = 64
lora_dropout = 0.1
# lora_target_modules = [
#     "q_proj",
#     "v_proj",
# ]


peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['llama'],
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm

In [None]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 33,554,432 || all params: 3,533,967,360 || trainable%: 0.9494833591219133


In [None]:
model.peft_config

{'default': LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='meta-llama/Llama-2-7b-chat-hf', revision=None, task_type='CAUSAL_LM', inference_mode=False, r=64, target_modules=['q_proj', 'v_proj'], lora_alpha=64, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)}

In [None]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (v_proj): Linear

### Use SFTtrainer to train the model

In [None]:
torch.cuda.empty_cache()

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

Map:   0%|          | 0/528 [00:00<?, ? examples/s]

In [None]:
trainer.train()

save_path = os.path.join(output_dir, new_model_id)
trainer.model.save_pretrained(save_path)

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


OutOfMemoryError: ignored