# LoRA Ablation Study

Comprehensive analysis of different LoRA configurations.

In [None]:
import sys
import os

# AFTER
IS_KAGGLE = 'KAGGLE_KERNEL_RUN_TYPE' in os.environ
IS_COLAB = 'COLAB_GPU' in os.environ

if IS_KAGGLE or IS_COLAB:
    print(f"🚀 Running on {'Kaggle' if IS_KAGGLE else 'Colab'}")
    
    # Install with no cache to save disk space
    !pip install -q --no-cache-dir transformers datasets peft accelerate torch \
                    scikit-learn matplotlib seaborn tqdm
    
    # Clear pip cache
    !rm -rf ~/.cache/pip
    
    print("✅ Dependencies installed")

import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adaptive_rag_router import LoRAAblationStudy, CLINC150DataLoader

print("🔬 LoRA Ablation Study")

In [None]:
study = LoRAAblationStudy(output_dir="./ablation_results")

# For cloud environments, use smaller sample size
sample_size = 500 if ('KAGGLE_KERNEL_RUN_TYPE' in os.environ or 'COLAB_GPU' in os.environ) else 2000
results_df = study.run_ablation(
    model_types=["distilbert", "roberta"],
    lora_ranks=[4, 8],
    num_samples=sample_size
)

In [None]:
print("📊 Ablation Study Results")
print(results_df.to_string(index=False))

In [None]:
plt.style.use('default')

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Accuracy vs Rank
for model_type in results_df['model_type'].unique():
    model_data = results_df[results_df['model_type'] == model_type]
    axes[0, 0].plot(model_data['lora_rank'], model_data['accuracy'], 'o-', label=model_type)
axes[0, 0].set_title('Accuracy vs LoRA Rank')
axes[0, 0].set_xlabel('LoRA Rank')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Parameters vs Rank
for model_type in results_df['model_type'].unique():
    model_data = results_df[results_df['model_type'] == model_type]
    axes[0, 1].plot(model_data['lora_rank'], model_data['trainable_params'], 's-', label=model_type)
axes[0, 1].set_title('Trainable Parameters vs LoRA Rank')
axes[0, 1].set_xlabel('LoRA Rank')
axes[0, 1].set_ylabel('Trainable Parameters')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Efficiency vs Rank
for model_type in results_df['model_type'].unique():
    model_data = results_df[results_df['model_type'] == model_type]
    axes[1, 0].plot(model_data['lora_rank'], model_data['parameter_efficiency']*100, '^-', label=model_type)
axes[1, 0].set_title('Parameter Efficiency vs LoRA Rank')
axes[1, 0].set_xlabel('LoRA Rank')
axes[1, 0].set_ylabel('Efficiency (%)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Performance vs Efficiency
for model_type in results_df['model_type'].unique():
    model_data = results_df[results_df['model_type'] == model_type]
    axes[1, 1].scatter(model_data['inference_time_ms'], model_data['accuracy'], label=model_type, s=100)
    
    for _, row in model_data.iterrows():
        axes[1, 1].annotate(f"r={row['lora_rank']}", 
                           (row['inference_time_ms'], row['accuracy']), 
                           fontsize=8)

axes[1, 1].set_title('Performance vs Efficiency Trade-off')
axes[1, 1].set_xlabel('Inference Time (ms)')
axes[1, 1].set_ylabel('Accuracy')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("✅ Ablation study completed!")