# LLAMA-2 QLORA Training
* to get hugging face LLAMA-2 token go to HF portal


In [1]:
!huggingface-cli login --token <your token>
!echo "Options = UnsafeLegacyRenegotiation" >> /usr/lib/ssl/openssl.cnf

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Model from Hugging Face hub
base_model = "meta-llama/Llama-2-7b-chat-hf"

# New instruction dataset
id_dataset = "mlabonne/guanaco-llama2-1k"

# Fine-tuned model
new_model = f"llama-2-7b-chat-{id_dataset}"

In [5]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [6]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards: 100%|██████████| 2/2 [02:01<00:00, 60.75s/it]


In [7]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
%env TOKENIZERS_PARALLELISM=false

env: TOKENIZERS_PARALLELISM=false


In [8]:
dataset = load_dataset(id_dataset, split="train")

In [10]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

## Training parameters
Below is a list of hyperparameters that can be used to optimize the training process:

    output_dir: The output directory is where the model predictions and checkpoints will be stored.
    num_train_epochs: One training epoch.
    fp16/bf16: Disable fp16/bf16 training.
    per_device_train_batch_size: Batch size per GPU for training.
    per_device_eval_batch_size: Batch size per GPU for evaluation.
    gradient_accumulation_steps: This refers to the number of steps required to accumulate the gradients during the update process.
    gradient_checkpointing: Enabling gradient checkpointing.
    max_grad_norm: Gradient clipping.
    learning_rate: Initial learning rate.
    weight_decay: Weight decay is applied to all layers except bias/LayerNorm weights.
    Optim: Model optimizer (AdamW optimizer).
    lr_scheduler_type: Learning rate schedule.
    max_steps: Number of training steps.
    warmup_ratio: Ratio of steps for a linear warmup.
    group_by_length: This can significantly improve performance and accelerate the training process.
    save_steps: Save checkpoint every 25 update steps.
    logging_steps: Log every 25 update steps.

In [11]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=100,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant"
)

In [12]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)



In [13]:
!rm -rf ./results

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss


In [None]:
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)


## Test

temaplate is 

```
<s>[INST] <<SYS>>
{{ system_prompt }}
<</SYS>

{{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/I
N```ST]


In [None]:
logging.set_verbosity(logging.CRITICAL)

prompt = "What is the Breaking Bad?"
pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])