# Model Iteration

This notebook iterates on the baseline model by applying LoRA and QLoRA fine-tuning techniques to improve performance and efficiency.

## Objectives
- Fine-tune the model using LoRA and QLoRA.
- Compare performance across model versions.
- Save improved model checkpoints.
- Document improvements and findings.

## Setup
Ensure the environment is set up with GPU support and the necessary dependencies installed.

In [None]:
import sys
import os
from pathlib import Path

# Add src to path
sys.path.append(str(Path.cwd().parent / 'src'))

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from utils.config import load_config
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

## Load Configuration

Load configurations for LoRA and QLoRA fine-tuning.

In [None]:
CONFIG_PATH = 'config/model_config.yaml'
config = load_config(CONFIG_PATH)

MODEL_NAME = config['base_model']['name']
LORA_CONFIG = config['versions']['v1_lora']
QLORA_CONFIG = config['versions']['v2_qlora']

LORA_OUTPUT_DIR = 'models/v1_lora'
QLORA_OUTPUT_DIR = 'models/v2_qlora'

logger.info(f'Loading model: {MODEL_NAME}')

## Load Dataset

Load the synthetic dataset and prepare it for training.

In [None]:
DATASET_PATH = 'data/synthetic/training_data.json'
dataset = load_dataset('json', data_files=DATASET_PATH)

train_val = dataset['train'].train_test_split(test_size=config['data']['val_split'])
train_dataset = train_val['train']
val_dataset = train_val['test']

logger.info(f'Training dataset size: {len(train_dataset)}')
logger.info(f'Validation dataset size: {len(val_dataset)}')

## Load Model and Tokenizer

Load the pre-trained model and tokenizer.

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=config['base_model']['use_auth_token'])
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, use_auth_token=config['base_model']['use_auth_token'])

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

logger.info('Model and tokenizer loaded successfully')

## Preprocess Dataset

Tokenize the dataset for training.

In [None]:
def preprocess_function(examples):
    inputs = [f'Business Description: {ex} -> Domain: ' for ex in examples['input']]
    targets = examples['output']
    model_inputs = tokenizer(inputs, max_length=config['data']['max_input_length'], truncation=True, padding='max_length')
    
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=config['data']['max_output_length'], truncation=True, padding='max_length')
    
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_val = val_dataset.map(preprocess_function, batched=True)

logger.info('Dataset preprocessing completed')

## LoRA Fine-Tuning

Configure and train the model with LoRA.

In [None]:
# Configure LoRA
lora_config = LoraConfig(
    r=LORA_CONFIG['lora_config']['r'],
    lora_alpha=LORA_CONFIG['lora_config']['lora_alpha'],
    target_modules=LORA_CONFIG['lora_config']['target_modules'],
    lora_dropout=LORA_CONFIG['lora_config']['lora_dropout'],
    bias=LORA_CONFIG['lora_config']['bias'],
    task_type=LORA_CONFIG['lora_config']['task_type']
)

lora_model = get_peft_model(model, lora_config)

# Training arguments
lora_training_args = TrainingArguments(
    output_dir=LORA_OUTPUT_DIR,
    num_train_epochs=LORA_CONFIG['num_epochs'],
    per_device_train_batch_size=LORA_CONFIG['batch_size'],
    per_device_eval_batch_size=LORA_CONFIG['batch_size'],
    gradient_accumulation_steps=LORA_CONFIG['gradient_accumulation_steps'],
    learning_rate=LORA_CONFIG['learning_rate'],
    weight_decay=LORA_CONFIG['weight_decay'],
    warmup_steps=LORA_CONFIG['warmup_steps'],
    evaluation_strategy='steps',
    eval_steps=LORA_CONFIG['eval_steps'],
    save_steps=LORA_CONFIG['save_steps'],
    logging_steps=LORA_CONFIG['logging_steps'],
    fp16=config['hardware']['mixed_precision'] == 'fp16',
    gradient_checkpointing=config['hardware']['gradient_checkpointing'],
    save_total_limit=2,
    report_to=['tensorboard']
)

# Initialize trainer
lora_trainer = Trainer(
    model=lora_model,
    args=lora_training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val
)

# Train model
logger.info('Starting LoRA fine-tuning...')
lora_trainer.train()
lora_trainer.save_model(LORA_OUTPUT_DIR)
tokenizer.save_pretrained(LORA_OUTPUT_DIR)
logger.info(f'LoRA model saved to {LORA_OUTPUT_DIR}')

## QLoRA Fine-Tuning

Configure and train the model with QLoRA.

In [None]:
from transformers import BitsAndBytesConfig

# Configure quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=QLORA_CONFIG['qlora_config']['double_quant']
)

# Reload model with quantization
qlora_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    use_auth_token=config['base_model']['use_auth_token']
)

# Configure QLoRA
qlora_config = LoraConfig(
    r=QLORA_CONFIG['qlora_config']['r'],
    lora_alpha=QLORA_CONFIG['qlora_config']['lora_alpha'],
    target_modules=QLORA_CONFIG['qlora_config']['target_modules'],
    lora_dropout=QLORA_CONFIG['qlora_config']['lora_dropout'],
    bias=QLORA_CONFIG['qlora_config']['bias'],
    task_type=QLORA_CONFIG['qlora_config']['task_type']
)

qlora_model = get_peft_model(qlora_model, qlora_config)

# Training arguments
qlora_training_args = TrainingArguments(
    output_dir=QLORA_OUTPUT_DIR,
    num_train_epochs=QLORA_CONFIG['num_epochs'],
    per_device_train_batch_size=QLORA_CONFIG['batch_size'],
    per_device_eval_batch_size=QLORA_CONFIG['batch_size'],
    gradient_accumulation_steps=QLORA_CONFIG['gradient_accumulation_steps'],
    learning_rate=QLORA_CONFIG['learning_rate'],
    weight_decay=QLORA_CONFIG['weight_decay'],
    warmup_steps=QLORA_CONFIG['warmup_steps'],
    evaluation_strategy='steps',
    eval_steps=QLORA_CONFIG['eval_steps'],
    save_steps=QLORA_CONFIG['save_steps'],
    logging_steps=QLORA_CONFIG['logging_steps'],
    fp16=config['hardware']['mixed_precision'] == 'fp16',
    gradient_checkpointing=config['hardware']['gradient_checkpointing'],
    save_total_limit=2,
    report_to=['tensorboard']
)

# Initialize trainer
qlora_trainer = Trainer(
    model=qlora_model,
    args=qlora_training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val
)

# Train model
logger.info('Starting QLoRA fine-tuning...')
qlora_trainer.train()
qlora_trainer.save_model(QLORA_OUTPUT_DIR)
tokenizer.save_pretrained(QLORA_OUTPUT_DIR)
logger.info(f'QLoRA model saved to {QLORA_OUTPUT_DIR}')

## Compare Model Versions

Compare the performance of the baseline, LoRA, and QLoRA models.

In [None]:
import pandas as pd

def generate_domains(model, tokenizer, description, num_suggestions=3):
    input_text = f'Business Description: {description} -> Domain: '
    inputs = tokenizer(input_text, return_tensors='pt', padding=True).to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=config['generation']['max_length'],
        num_return_sequences=num_suggestions,
        temperature=config['generation']['temperature'],
        top_p=config['generation']['top_p'],
        do_sample=True
    )
    return [tokenizer.decode(output, skip_special_tokens=True).split('Domain: ')[-1] for output in outputs]

sample_descriptions = [
    'Professional consulting firm specializing in business optimization',
    'Tech startup focused on AI solutions',
    'Organic coffee shop in downtown area'
]

# Load baseline model
baseline_model = AutoModelForCausalLM.from_pretrained('models/baseline')

# Compare predictions
results = []
for desc in sample_descriptions:
    baseline_domains = generate_domains(baseline_model, tokenizer, desc)
    lora_domains = generate_domains(lora_model, tokenizer, desc)
    qlora_domains = generate_domains(qlora_model, tokenizer, desc)
    
    results.append({
        'Description': desc,
        'Baseline': ', '.join(baseline_domains),
        'LoRA': ', '.join(lora_domains),
        'QLoRA': ', '.join(qlora_domains)
    })

# Display results
results_df = pd.DataFrame(results)
print(results_df)

# Plot training metrics (assuming tensorboard logs)
plt.figure(figsize=(10, 6))
plt.plot(trainer.state.log_history['loss'], label='Baseline Loss')
plt.plot(lora_trainer.state.log_history['loss'], label='LoRA Loss')
plt.plot(qlora_trainer.state.log_history['loss'], label='QLoRA Loss')
plt.xlabel('Training Steps')
plt.ylabel('Loss')
plt.legend()
plt.title('Training Loss Comparison')
plt.show()

## Conclusion

LoRA and QLoRA fine-tuning have improved model efficiency and performance. The QLoRA model shows the best results with reduced memory usage. The next step is to evaluate these models using the LLM-as-a-Judge framework.