### Imports

In [3]:
%pip install -q datasets requests torch peft bitsandbytes transformers trl accelerate sentencepiece

In [4]:
import os
import re
import math
from tqdm import tqdm
from dotenv import load_dotenv
from huggingface_hub import login
import torch
import transformers
from transformers import(
    AutoModelForCausalLM ,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed
)
from peft import LoraConfig , PeftModel
from datetime import datetime

In [5]:
#hugging face login
from google.colab import userdata
hf_token = userdata.get('HF_TOKEN')
login(hf_token , add_to_git_credential = True)

In [6]:
!git config --global credential.helper store


### Loading the model

In [7]:
BASE_MODEL = "meta-llama/Llama-3.1-8B-Instruct"

#Hyperparameters for QLoRA Fine_Tuning
LORA_R = 32
LORA_ALPHA = 64
TARGET_MODULES = ["q_proj" , "v_proj" , "k_proj" , "o_proj"]

**Quantization**

In [8]:
QUANT_4BIT = True

if QUANT_4BIT == True :
    quant_config = BitsAndBytesConfig(
        load_in_4bit = True ,
        bnb_4bit_use_double_quant=True ,
        bnb_4bit_compute_dtype = torch.bfloat16, #improves speed and efficiency of training
        bnb_4bit_quant_type="nf4"
    )
else :
    #loading the model in 8 bit
    quant_config = BitsAndBytesConfig(load_in_8bit=True)

In [9]:
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL ,
    quantization_config = quant_config ,
    device_map ='auto'
)

In [11]:
base_model

### Testing the base model


#### Tester Class

In [30]:
import math
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import numpy as np

# COLOR MAP for terminal output
GREEN = "\033[92m"
ORANGE = "\033[93m"
RED = "\033[91m"
RESET = "\033[0m"
COLOR_MAP = {"red": RED, "orange": ORANGE, "green": GREEN}

class Tester:
    def __init__(self, predictor, data, title=None, size=250):
        self.predictor = predictor
        self.data = data
        self.title = title or predictor.__name__.replace("_", " ").title()
        self.size = size

        # Use numpy arrays for numerical data
        self.guesses = np.zeros(size)
        self.truths = np.zeros(size)
        self.errors = np.zeros(size)
        self.lche = np.zeros(size)
        self.sles = np.zeros(size)
        self.colors = []  # Keep as list for strings

        # Counters
        self.green_count = 0
        self.orange_count = 0
        self.red_count = 0

        # Cache for computed metrics
        self._average_error = None
        self._rmsle = None

    def run_datapoint(self, i):
        try:
            datapoint = self.data[i] # Changed .iloc[i] to [i] for datasets.Dataset objects
            guess = float(self.predictor(datapoint)) # Pass the entire datapoint object
            truth = float(datapoint['price'])

            error = abs(truth - guess)
            log_error = math.log(truth + 1) - math.log(guess + 1)
            sle = log_error ** 2
            log_cosh_error = self.safe_log_cosh(error)

            color = self.color_for(error, truth)

            # Update counters
            if color == 'green':
                self.green_count += 1
            elif color == 'orange':
                self.orange_count += 1
            else:  # red
                self.red_count += 1

            # Store results in arrays
            self.guesses[i] = guess
            self.truths[i] = truth
            self.errors[i] = error
            self.lche[i] = log_cosh_error
            self.sles[i] = sle
            self.colors.append(color)

            return color

        except (ValueError, TypeError, AttributeError) as e:
            print(f"Error processing datapoint {i}: {e}")
            self.colors.append('red')
            self.red_count += 1
            return 'red'

    def safe_log_cosh(self, x):
        """Avoids overflow in log cosh calculation"""
        x = max(min(x, 500), -500)  # Cap between -500 and 500
        return math.log(math.cosh(x))

    def color_for(self, error, truth):
        if error < 40 or error/truth < 0.2:
            return 'green'
        elif error < 80 or error/truth < 0.4:
            return 'orange'
        else:
            return 'red'

    @property
    def average_error(self):
        if self._average_error is None:
            self._average_error = np.mean(self.errors)
        return self._average_error

    @property
    def rmsle(self):
        if self._rmsle is None:
            self._rmsle = math.sqrt(np.mean(self.sles))
        return self._rmsle

    def chart(self, title):
        plt.figure(figsize=(12, 8))

        max_val = max(np.max(self.truths), np.max(self.guesses))

        # Add error bands
        x = np.linspace(0, max_val, 100)
        plt.fill_between(x, x*0.8, x*1.2, color='green', alpha=0.1, label='±20% Range')
        plt.fill_between(x, x*0.6, x*1.4, color='orange', alpha=0.1, label='±40% Range')

        # Perfect prediction line
        plt.plot([0, max_val], [0, max_val], color='skyblue', lw=2,
                alpha=0.6, label='Perfect Prediction')

        # Scatter plot
        # Use self.colors directly as it already contains valid matplotlib color strings
        plt.scatter(self.truths, self.guesses, s=50, c=self.colors,
                   alpha=0.6, label='Predictions')

        # Statistics text box
        green_pct = (self.green_count/self.size*100)
        orange_pct = (self.orange_count/self.size*100)
        red_pct = (self.red_count/self.size*100)

        stats_text = (
            f'Accuracy Distribution:\n'
            f'Green: {green_pct:.1f}%\n'
            f'Orange: {orange_pct:.1f}%\n'
            f'Red: {red_pct:.1f}%\n'
            f'Average Error: ${self.average_error:,.2f}\n'
            f'RMSLE: {self.rmsle:.2f}'
        )

        plt.text(0.02, 0.98, stats_text,
                transform=plt.gca().transAxes,
                verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

        # Customize plot
        plt.xlabel('True Values ($)', fontsize=12)
        plt.ylabel('Predicted Values ($)', fontsize=12)
        plt.title(title, fontsize=14, pad=20)
        plt.xlim(0, max_val)
        plt.ylim(0, max_val)
        plt.grid(True, alpha=0.3)
        plt.legend(loc='lower right')

        plt.tight_layout()
        plt.show()

    def report(self):
        # Print summary statistics with color
        print("\nTest Results Summary:")
        print(f"Total Predictions: {self.size}")
        print(f"{GREEN}Correct (Green): {self.green_count} ({self.green_count/self.size*100:.1f}%){RESET}")
        print(f"{ORANGE}Close (Orange): {self.orange_count} ({self.orange_count/self.size*100:.1f}%){RESET}")
        print(f"{RED}Wrong (Red): {self.red_count} ({self.red_count/self.size*100:.1f}%){RESET}")
        print(f"Average Error: ${self.average_error:,.2f}")
        print(f"RMSLE: {self.rmsle:.2f}")

        title = f"{self.title} Error=${self.average_error:,.2f}  RMSLE={self.rmsle:.2f}  HIT={self.green_count/self.size*100:.1f}%"
        self.chart(title)

    def run(self):
        # Progress bar for overall testing
        with tqdm(total=self.size, desc="Testing Progress",
                 bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]") as pbar:

            # Progress bar for correct predictions
            expected_correct = int(self.size * 0.7)  # Set expected correct to 70%
            with tqdm(total=expected_correct, desc="Correct Predictions",
                     bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}") as correct_pbar:

                for i in range(self.size):
                    color = self.run_datapoint(i)
                    pbar.update(1)

                    if color == 'green' and correct_pbar.n < expected_correct:
                        correct_pbar.update(1)

        self.report()

    @classmethod
    def test(cls, function, data):
        cls(function, data).run()

#### Importing Data

In [13]:
from datasets import load_dataset , Dataset , DatasetDict
import matplotlib.pyplot as plt

DATASET_NAME = "Vishy08/pricer-data"
dataset = load_dataset(DATASET_NAME)

In [14]:
train = dataset['train']
validation = dataset['val']
test = dataset['test']

In [15]:
validation[0]

#### Tokenizing and Prediction

In [17]:
#tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code = True)
#using eos padding token at right side
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e9:.1f} GB")

In [23]:
def predict_pricer(datapoint):
    set_seed(42)
    prompt = str(datapoint['test_prompt']) # Explicitly cast to string to ensure compatibility with tokenizer
    inputs = tokenizer.encode(prompt , return_tensors='pt').to('cuda')
    outputs = base_model.generate(inputs , max_new_tokens = 3 , num_return_sequences = 1)
    response = tokenizer.decode(outputs[0])
    return extract_pricer(response)

In [19]:
def extract_pricer(s):
    if 'Price is $' in s:
        contents = s.split("Price is $")[1]
        contents = contents.replace(',','').replace('$','')
        match = re.search(r"[-+]?\d*\.\d+|\d+",contents)
        return float(match.group()) if match else 0
    return 0

In [31]:
Tester.test(predict_pricer , test)