# 🏦 Minimal Colab: Fine-tune Small LLM for SG Financial Regulations

A streamlined notebook to run the improved pipeline only.


In [None]:
# 1) Setup
!pip install -q torch transformers datasets peft accelerate bitsandbytes
!pip install -q nltk rouge-score pandas numpy

import nltk
nltk.download('punkt')
print('✅ Setup complete')


In [None]:
# 2) Clone repo and check GPU
!git clone https://github.com/yihhan/finetune.git
%cd finetune

import torch
print('Device:', 'CUDA' if torch.cuda.is_available() else 'CPU')


In [None]:
# 3) Idempotent enhanced dataset prep
import os
qa = 'processed_data/enhanced_financial_regulation_qa.json'
tr = 'processed_data/enhanced_training_data.json'
if not (os.path.exists(qa) and os.path.exists(tr)):
    !python improved_dataset_prep.py
else:
    print('Enhanced dataset exists, skipping generation')

print('Ready.')


In [None]:
## 🚀 Training Phase

# 4) Improved training (LoRA, better params)
print("Starting fine-tuning with improved parameters...")
print("- Enhanced dataset: 21 Q&A pairs → 63 training samples")
print("- LoRA config: r=32, alpha=64")
print("- Training epochs: 5")
print("- Model output: improved_finetuned_financial_model/")

!python improved_train.py

print('✅ Training completed!')


In [None]:
# 5) Quick inference (improved)
!python improved_inference.py --question "What are the capital adequacy requirements for banks in Singapore?"


In [None]:
# 6) Optional: evaluation summary
!python eval.py
