# 🚨 SUPER SIMPLE Debug - Test Basic Concepts

**After complete failure, let's test the absolute fundamentals:**

## 🔍 What We're Testing:
1. **Different models** - Find one that gives sensible responses
2. **Manual weight changes** - Can we affect output at all?
3. **Tiny training** - Does 1 example + 1 epoch change anything?

## 🎯 Goal:
Identify if the problem is:
- **Model choice** (wrong/broken model)
- **Training setup** (LoRA not working) 
- **Fundamental misunderstanding**


## 📦 Install Dependencies


In [None]:
!pip install transformers datasets torch -q


## 🔍 Test 1: Different Models


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

print("🔍 TESTING DIFFERENT MODELS")
print("=" * 40)

models_to_test = [
    "google/flan-t5-small",
    "google/flan-t5-base", 
    "t5-small",
    "google/t5-efficient-tiny"
]

test_question = "What currency does Singapore use?"

for model_name in models_to_test:
    print(f"\n📊 Testing: {model_name}")
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        
        inputs = tokenizer(test_question, return_tensors="pt")
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=20, num_beams=2)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        print(f"   Response: '{response}'")
        
        # Check if response makes sense
        if "singapore" in response.lower() or "sgd" in response.lower() or "dollar" in response.lower():
            print("   ✅ SENSIBLE response!")
        else:
            print("   ❌ Nonsense response")
            
    except Exception as e:
        print(f"   ❌ Failed to load: {e}")


## 🔧 Test 2: Manual Weight Changes


In [None]:
print("🔧 TESTING MANUAL WEIGHT CHANGES")
print("=" * 40)

try:
    # Load model
    tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
    model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
    
    test_input = "What is Singapore?"
    inputs = tokenizer(test_input, return_tensors="pt")
    
    # Get original response
    with torch.no_grad():
        original_outputs = model.generate(**inputs, max_new_tokens=10, num_beams=2)
    original_response = tokenizer.decode(original_outputs[0], skip_special_tokens=True)
    print(f"Original response: '{original_response}'")
    
    # Manually modify a weight (just to see if anything changes)
    print("\nManually changing model weights...")
    with torch.no_grad():
        # Find first parameter and add some noise
        for name, param in model.named_parameters():
            if param.requires_grad and len(param.shape) > 1:
                print(f"Modifying: {name}")
                param.data += torch.randn_like(param.data) * 0.01  # Small random noise
                break
    
    # Get new response
    with torch.no_grad():
        new_outputs = model.generate(**inputs, max_new_tokens=10, num_beams=2)
    new_response = tokenizer.decode(new_outputs[0], skip_special_tokens=True)
    print(f"Modified response: '{new_response}'")
    
    if original_response != new_response:
        print("✅ SUCCESS: Manual weight change affected output!")
        manual_works = True
    else:
        print("❌ PROBLEM: Manual weight change had no effect")
        manual_works = False
        
except Exception as e:
    print(f"❌ Manual weight test failed: {e}")
    manual_works = False

print(f"\n🎯 Manual weight changes work: {manual_works}")
