In [2]:
%pip install torch wandb --quiet

# Install Hugging Face libraries
%pip install  -U transformers datasets accelerate evaluate bitsandbytes --quiet
%pip install peft --quiet
%pip install datasets trl ninja packaging --quiet
%pip install diffusers safetensors  --quiet
#FlashAttention only supports Ampere GPUs or newer. #NEED A100 IN GOOGLE COLAB
#%pip install -U flash-attn --no-build-isolation --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m267.1/267.1 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━

In [1]:
import os

import torch
import wandb
from datasets import load_dataset
from huggingface_hub import login
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)

from peft import LoraConfig
from trl import SFTTrainer, setup_chat_format

RuntimeError: Failed to import transformers.training_args because of the following error (look up to see its traceback):
No module named 'transformers.integrations'

# Logging & Env Setup

In [5]:
# set up wandb environment variables
os.environ["WANDB_PROJECT"] = ""
os.environ["WANDB_LOG_MODEL"] = ""
os.environ["WANDB_KEY"] = ""
os.environ["HUGGINGFACE_ACCESS_TOKEN"] = ""

In [6]:
# hugging face login
login(token=os.getenv("HUGGINGFACE_ACCESS_TOKEN"))
# wandb login
wandb.login(key=os.getenv("WANDB_KEY"))

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Load Dataset

In [13]:
from datasets import load_dataset
train_dataset = load_dataset(
    path="../data/abstract_dataset/", split="train"
)
test_dataset = load_dataset(path="../data/abstract_dataset", split="test")

print("Length of train dataset: ", len(train_dataset))
print("Length of test dataset: ", len(test_dataset))

Length of train dataset:  12452
Length of test dataset:  3113


In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    # attn_implementation="flash_attention_2",  # uncomment if you have an Ampere GPU
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.padding_side = 'right' 
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.unk_token_id

# set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)

# Prepare for Training

In [12]:
training_args = TrainingArguments(
    output_dir="models/mistral-7b-instruct-v0.2-lora",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="adamw_torch",
    logging_steps=10,
    max_steps=10,
    save_strategy="steps",
    evaluation_strategy="steps",
    learning_rate=2e-4,
    fp16=True,                   
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    push_to_hub=False,
    report_to="wandb",
    run_name="mistral-7b-instruct-v0.2-lora",
    load_best_model_at_end=True,
    auto_find_batch_size=True
)

# Set up Lora configuration
peft_config = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.05,
    r=256,
    bias="none",
    target_modules=["q_proj", "v_proj", "all_linear"],
    task_type="CAUSAL_LM",
)

In [21]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    dataset_kwargs={"add_special_tokens": False, "append_concat_token": False}
)



Map:   0%|          | 0/312 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [22]:
trainer.train()
trainer.save_model()

INFO:info:Starting training process...


Step,Training Loss,Validation Loss
10,61.6191,16.348982


INFO:info:Training completed.
INFO:info:Model saved successfully.
