### Setup and Configuration

In [None]:
# pip installs

%pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
%pip install -q --upgrade requests==2.32.4 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 peft==0.14.0 trl==0.14.0 fsspec==2025.3.0 matplotlib wandb datasets

print("Installation successfull.")

In [None]:
# imports

import os
import re
import math
from tqdm import tqdm
from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
#project constants
HF_USER = "Vishy08"
PROJECT_NAME = "product-pricer"


RUN_NAME = f"{datetime.now() :%d-%m-%Y_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"


In [None]:
#hf-login
hf_token = userdata.get("HF_TOKEN")
login(hf_token , add_to_git_credential = True)

In [None]:
#wandb setup
LOG_TO_WANDB = True

wandb_api_key = userdata.get('WANDB_API_KEY')
os.environ['WANDB_API_KEY'] = wandb_api_key
wandb.login()

os.environ["WANDB_PROJECT"] = 'product-pricer'
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"
os.environ["WANDB_WATCH"] = "gradients"

In [None]:
#QLORA Parameters
LORA_R = 32
LORA_ALPHA = 64
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
LORA_DROPOUT = 0.1
QUANT_4_BIT = True

In [None]:
#hyperparameter
EPOCHS = 1
BATCH_SIZE = 16
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 1e-4
LR_SCHEDULER_TYPE = 'cosine'
WARMUP_RATIO = 0.03
OPTIMIZER = "paged_adamw_32bit"

In [None]:
STEPS = 50
SAVE_STEPS = 2000
LOG_TO_WANDB = True

%matplotlib inline

### Data Pipeline

In [None]:
dataset = load_dataset("Vishy08/pricer-data")

In [None]:
train_subset = dataset['train'].shuffle(seed=42).select(range(100000))
val_dataset = dataset['val']

print(f"Train on: {len(train_subset)}")
print(f"Val on: {len(val_dataset)}")

In [None]:
from trl import DataCollatorForCompletionOnlyLM
response_template = "Price is $"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

### Model and Tokenizer Initialization

In [None]:

QUANT_4_BIT = True

if QUANT_4_BIT:
    quant_config = BitsAndBytesConfig(
        load_in_4bit = True ,
        bnb_4bit_use_double_quant = True ,
        bnb_4bit_compute_dtype = torch.bfloat16 ,
        bnb_4bit_quant_type = 'nf4' ,
    )
else :
    quant_config = BitsAndBytesConfig(
        load_in_8bit = True ,
        bnb_8bit_compute_dtype = torch.bfloat16,
    )

In [None]:

#tokenizer
BASE_MODEL = 'meta-llama/Llama-3.1-8B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL , trust_remote_code = True )
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL  ,
    quantization_config = quant_config ,
    device_map = 'auto'
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(base_model)

### Training

#### Restarting from a checkpoint( due to colab crash midway)

In [None]:
run = wandb.init(project = "product-pricer" , 
                resume = "allow" , 
                id = "nxo7668d")
                
artifact = run.use_artifact("llm_engineering/product-pricer/model-nxo7668d:v2",type ="model")
artifact_dir = artifact.download()

In [None]:
# path to resume training from the artifact in wandb 
checkpoint_path = "/content/artifacts/model-nxo7668d:v2"

#### Starting

In [None]:
# LoRA Config
lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# SFT Config
train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=1,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    learning_rate=LEARNING_RATE,
    logging_steps=50,


    report_to = 'wandb',

    # Validation Settings
    eval_strategy="steps",
    eval_steps=200,
    do_eval=True,

    # Dataset Settings
    max_seq_length= 1024,
    dataset_text_field="prompt",
    packing=False,

    # Hub / Saving
    save_steps=500,
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True
)

In [None]:
#INITIALIZATION
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train_subset,
    eval_dataset=val_dataset,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator,
)

In [None]:
fine_tuning.train(resume_from_checkpoint=checkpoint_path)