# SFT with LoRA

**Goals:** 75-80% cost reduction, 60-70% time reduction

In [None]:
!pip install -q transformers datasets peft trl accelerate wandb evaluate

In [None]:
import sys; sys.path.append('../')
import torch, time, json, os
from src.data.loader import DatasetLoader
from src.data.processor import DataProcessor
from src.model.base import load_base_model, load_tokenizer, get_model_info
from src.model.lora import LoRAWrapper
from src.training.sft import SFTTrainer
from src.utils.metrics import MetricsTracker

# Optional wandb - no API key required
try:
    import wandb
    wandb.init(project='persona-chatbot-rlhf', name='sft-lora', mode='disabled')  # offline mode
    USE_WANDB = True
except:
    USE_WANDB = False
    class wandb:
        @staticmethod
        def log(*args, **kwargs): pass
        @staticmethod
        def finish(): pass

print(f'W&B: {"enabled (offline)" if USE_WANDB else "disabled"}')
print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"}')

In [None]:
# Config
model_config = {'name': 'gpt2-medium', 'cache_dir': '../models/base', 'device_map': 'auto'}
lora_config = {'r': 16, 'alpha': 32, 'dropout': 0.1, 'target_modules': ['c_attn', 'c_proj'], 'bias': 'none', 'task_type': 'CAUSAL_LM'}
training_config = {'output_dir': '../models/sft', 'num_epochs': 3, 'per_device_batch_size': 4, 'gradient_accumulation_steps': 4, 'learning_rate': 2e-4, 'warmup_steps': 500, 'weight_decay': 0.01, 'logging_steps': 50, 'eval_steps': 500, 'save_steps': 1000, 'use_wandb': False}
data_config = {'base_model': 'gpt2-medium', 'max_length': 512}

In [None]:
# Load data
loader = DatasetLoader()
train = loader.load_personachat(split='train', use_synthetic=True)
val = loader.load_personachat(split='validation', use_synthetic=True)
processor = DataProcessor(config=data_config)
train_proc = processor.tokenize(processor.preprocess(train))
val_proc = processor.tokenize(processor.preprocess(val))
print(f'Data: {len(train_proc)} train, {len(val_proc)} val')

In [None]:
# Model + LoRA
model = load_base_model(model_config)
tokenizer = load_tokenizer(model_config)
total_p = get_model_info(model)['total_parameters']
lora = LoRAWrapper(lora_config)
model = lora.apply_lora(model, lora_config)
lora.print_trainable_params(model)
trainable_p = get_model_info(model)['trainable_parameters']
reduction = (1 - trainable_p/total_p) * 100
print(f'Params: {total_p/1e6:.0f}M -> {trainable_p/1e6:.1f}M ({reduction:.1f}% reduction)')

In [None]:
# Train
tracker = MetricsTracker(gpu_hourly_rate=0.35)
tracker.start_timing()
os.makedirs(training_config['output_dir'], exist_ok=True)
trainer = SFTTrainer(model, tokenizer, train_proc, val_proc, training_config)
print('Training...')
results = trainer.train()

In [None]:
# Metrics
hours = tracker.stop_timing() / 3600
gpus = torch.cuda.device_count() if torch.cuda.is_available() else 1
cost = tracker.track_cost(hours, gpus)
savings = tracker.calculate_savings('full_finetuning', 'lora')
print(f'Time: {hours:.2f}h, Cost: ${cost:.2f}')
print(f'Savings: Time {savings["time_savings_percent"]:.1f}% (target 60-70%), Cost {savings["cost_savings_percent"]:.1f}% (target 75-80%)')
cost_ok = savings['cost_savings_percent'] >= 75
time_ok = savings['time_savings_percent'] >= 60
print(f'Targets: Cost {"✅" if cost_ok else "❌"}, Time {"✅" if time_ok else "❌"}')
wandb.log({'hours': hours, 'cost': cost, 'time_savings%': savings['time_savings_percent'], 'cost_savings%': savings['cost_savings_percent']})

In [None]:
# Save
path = f"{training_config['output_dir']}/final"
os.makedirs(path, exist_ok=True)
model.save_pretrained(path)
tokenizer.save_pretrained(path)
with open(f"{training_config['output_dir']}/summary.json", 'w') as f:
    json.dump({'params': int(trainable_p), 'hours': hours, 'cost': cost, 'savings_time%': savings['time_savings_percent'], 'savings_cost%': savings['cost_savings_percent'], 'targets_met': {'cost': cost_ok, 'time': time_ok}}, f, indent=2)
wandb.finish()
print(f'✅ Saved to {path}. Next: 4_reward_and_ppo.ipynb')