# 🚀 SIMPLE Flan-T5 Fine-tuning Test

**Back to basics!** No complexity, just testing if fine-tuning works at all.

## 🎯 Goal: 
- **10 simple examples** about Singapore/MAS
- **Flan-T5-small** (manageable size)
- **Basic LoRA** (r=8, alpha=16)
- **Test if we get ANY different responses**


## 📦 Install Dependencies


In [None]:
!pip install transformers datasets peft torch accelerate -q


## 🧪 Test Environment


In [None]:
# Test if basic components work
print("🔍 CHECKING ENVIRONMENT")
print("=" * 30)

# Check packages
required_packages = ["torch", "transformers", "datasets", "peft"]
missing = []

for package in required_packages:
    try:
        __import__(package)
        print(f"   ✅ {package}")
    except ImportError:
        print(f"   ❌ {package} - MISSING")
        missing.append(package)

if missing:
    print(f"\n❌ Missing: {missing}")
else:
    print("\n✅ All packages available!")


## 🔄 Simple Fine-tuning


In [None]:
import json
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForSeq2SeqLM, 
    TrainingArguments, 
    Trainer,
    DataCollatorForSeq2Seq
)
from peft import LoraConfig, get_peft_model, TaskType

print("🚀 SIMPLE FLAN-T5 FINE-TUNING")
print("=" * 50)

# 1. Create tiny dataset
print("1. Creating simple dataset...")
simple_data = [
    {"input_text": "What is MAS?", "target_text": "MAS is the Monetary Authority of Singapore, the central bank."},
    {"input_text": "What currency does Singapore use?", "target_text": "Singapore uses the Singapore Dollar (SGD)."},
    {"input_text": "Who regulates banks in Singapore?", "target_text": "The Monetary Authority of Singapore (MAS) regulates banks."},
    {"input_text": "What is Singapore's capital?", "target_text": "Singapore City is the capital, regulated by MAS."},
    {"input_text": "What does SGD stand for?", "target_text": "SGD stands for Singapore Dollar, the official currency."},
    {"input_text": "Where is MAS located?", "target_text": "MAS is located in Singapore's financial district."},
    {"input_text": "What is Singapore known for?", "target_text": "Singapore is known as a financial hub with MAS oversight."},
    {"input_text": "How many banks are in Singapore?", "target_text": "Singapore has over 200 banks supervised by MAS."},
    {"input_text": "What does MAS regulate?", "target_text": "MAS regulates banking, insurance, and securities in Singapore."},
    {"input_text": "Why is Singapore important?", "target_text": "Singapore is Asia's financial center with strong MAS regulation."}
]

dataset = Dataset.from_list(simple_data)
print(f"   Dataset size: {len(dataset)} examples")


In [None]:
# 2. Load model
print("\n2. Loading Flan-T5-small...")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("   ✅ Model loaded")


In [None]:
# 3. Setup LoRA
print("\n3. Setting up LoRA...")
lora_config = LoraConfig(
    r=8,  # Small rank
    lora_alpha=16,  # 2x rank
    target_modules=["q", "v"],  # Just attention
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM,
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
# 4. Preprocess data
print("\n4. Preprocessing...")

def preprocess_function(examples):
    inputs = [ex for ex in examples["input_text"]]
    targets = [ex for ex in examples["target_text"]]
    
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding=True)
    labels = tokenizer(targets, max_length=128, truncation=True, padding=True)
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset.column_names
)
print("   ✅ Data preprocessed")


In [None]:
# 5. Training setup
print("\n5. Setting up training...")

training_args = TrainingArguments(
    output_dir="simple_model",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    learning_rate=1e-3,  # Higher LR
    logging_steps=1,
    save_steps=50,
    warmup_steps=5,
    save_total_limit=1,
    remove_unused_columns=False,
    report_to=None,
)

data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

print("   ✅ Trainer ready")


In [None]:
# 6. Train!
print("\n6. Training...")
trainer.train()
trainer.save_model()
print("\n✅ Training completed!")


## 🧪 Test Results


In [None]:
# Test if it actually works
print("🧪 TESTING RESULTS")
print("=" * 30)

# Load base model for comparison
base_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")

# Load fine-tuned model
from peft import PeftModel
ft_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
ft_model = PeftModel.from_pretrained(ft_model, "simple_model")

# Test questions
test_questions = [
    "What does MAS stand for?",
    "What currency does Singapore use?", 
    "Who regulates banks in Singapore?"
]

different_count = 0

for i, question in enumerate(test_questions, 1):
    print(f"\n{i}. Question: {question}")
    
    inputs = tokenizer(question, return_tensors="pt")
    
    # Base model
    with torch.no_grad():
        base_outputs = base_model.generate(**inputs, max_new_tokens=30, num_beams=2)
    base_response = tokenizer.decode(base_outputs[0], skip_special_tokens=True)
    
    # Fine-tuned model
    with torch.no_grad():
        ft_outputs = ft_model.generate(**inputs, max_new_tokens=30, num_beams=2)
    ft_response = tokenizer.decode(ft_outputs[0], skip_special_tokens=True)
    
    print(f"   Base:       {base_response}")
    print(f"   Fine-tuned: {ft_response}")
    
    if base_response != ft_response:
        print("   ✅ SUCCESS: Different responses!")
        different_count += 1
    else:
        print("   ❌ PROBLEM: Still identical")

# Results
success_rate = (different_count / len(test_questions)) * 100
print(f"\n🎯 FINAL RESULTS: {different_count}/{len(test_questions)} different ({success_rate:.1f}%)")

if success_rate >= 50:
    print("\n🎉 SUCCESS: Simple fine-tuning works!")
    print("✅ We can now scale up with confidence")
else:
    print("\n❌ FAILED: Even simple approach doesn't work")
    print("❌ Need to debug fundamental issues")
