# 🎯 WORKING Original Approach - From Successful Scripts

## ✅ **Based on ACTUAL WORKING CODE:**
- **train.py** - DialoGPT-medium with LoRA (r=16, alpha=32)
- **inference.py** - Proper prompt format and generation parameters
- **demo.py** - Complete pipeline that produces good results
- **README.md** - Shows 7.25x-10.37x BLEU improvement achieved

## 🔍 **This Uses the EXACT Code That Worked:**
- **DialoGPT-medium** (not GPT-2)
- **Proper instruction format**: "### Instruction:\nAnswer the following question about Singapore financial regulations:\n\n### Input:\n{question}\n\n### Response:\n"
- **Working generation parameters** from inference.py
- **Proven LoRA config** from train.py

## 🚀 **Expected Results: Professional Singapore Financial Responses**


In [None]:
# 🚀 SETUP - EXACT WORKING CONFIGURATION FROM SCRIPTS
!pip install torch transformers datasets peft accelerate rouge-score nltk sentence-transformers -q

import torch
import json
import time
import numpy as np
from pathlib import Path
from typing import Optional, Dict, Any, List

from transformers import (
    AutoTokenizer, AutoModelForCausalLM, 
    TrainingArguments, Trainer, DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
from datasets import Dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Setup complete! Using device: {device}")
print("🎯 Using EXACT working approach from train.py, inference.py, demo.py")


In [None]:
# 🤖 EXACT WORKING MODEL SETUP (From train.py)
print("🤖 Setting up DialoGPT-medium with EXACT working LoRA config...")

# EXACT model from train.py that produced good results
model_name = "microsoft/DialoGPT-medium"  # NOT GPT-2!
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)

# EXACT LoRA config from train.py (LoRAArguments class)
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,                    # From train.py default
    lora_alpha=32,          # From train.py default  
    lora_dropout=0.1,       # From train.py default
    target_modules=["q_proj", "v_proj"],  # From train.py default
    bias="none"
)

# Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

print(f"✅ DialoGPT-medium loaded on {device}")
print("🎯 Using EXACT LoRA config from working train.py!")
print("📊 This configuration achieved 7.25x-10.37x BLEU improvement")


In [None]:
# 📊 WORKING DATASET (From enhanced_training_data.json)
print("📊 Loading the EXACT dataset that produced successful results...")

# Load the actual working dataset from enhanced_training_data.json
with open("processed_data/enhanced_training_data.json", "r") as f:
    enhanced_data = json.load(f)

print(f"✅ Loaded {len(enhanced_data)} examples from enhanced_training_data.json")

# Convert to training format (first few examples)
training_texts = []
for item in enhanced_data[:20]:  # Use first 20 examples
    # Use the exact format from the working dataset
    text = f"{item['instruction']}\n\n{item['input']}\n\n{item['output']}"
    training_texts.append({"text": text})

print(f"📝 Sample format:")
print(f"   {training_texts[0]['text'][:200]}...")
print(f"\n🎯 This is the EXACT dataset format that produced professional responses!")


In [None]:
# 📚 WORKING DATA PREPARATION (From train.py)
print("📚 Preparing data with EXACT working tokenization...")

def tokenize_function(examples):
    """EXACT tokenization from train.py DataTrainingArguments"""
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=512,  # From train.py default max_seq_length
        padding=False
    )

# Create and tokenize dataset
dataset = Dataset.from_list(training_texts)
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# Data collator (same as train.py)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

print(f"✅ Tokenized {len(tokenized_dataset)} examples")
print(f"📏 Max length: 512 tokens (from working train.py)")
print(f"🎯 Using EXACT tokenization that produced professional responses")


In [None]:
# 🏋️ WORKING TRAINING PARAMETERS (From train.py)
print("🏋️ Training with EXACT working parameters from train.py...")

# EXACT training arguments from train.py create_training_arguments function
training_args = TrainingArguments(
    output_dir="working_finetuned_model",
    num_train_epochs=3,              # From train.py default
    per_device_train_batch_size=4,   # From train.py default
    per_device_eval_batch_size=4,    # From train.py default
    learning_rate=5e-5,              # From train.py default
    warmup_steps=100,                # From train.py default
    logging_steps=10,                # From train.py default
    save_steps=500,                  # From train.py default
    save_total_limit=2,              # From train.py default
    remove_unused_columns=False,
    report_to=None,
    fp16=torch.cuda.is_available(),
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

# Train with EXACT working settings
print("🚀 Training with EXACT parameters from working train.py...")
trainer.train()

# Save model (same structure as train.py)
model.save_pretrained("working_finetuned_model")
tokenizer.save_pretrained("working_finetuned_model")

print("✅ Training completed with WORKING train.py parameters!")
print("💾 Model saved - should produce professional Singapore financial responses!")
print("🎯 Expected: 7.25x-10.37x BLEU improvement like README.md shows")


In [None]:
# 🧪 WORKING INFERENCE TEST (From inference.py)
print("🧪 Testing with EXACT inference approach from inference.py...")

def create_prompt_working(question: str) -> str:
    """EXACT prompt format from inference.py create_prompt method"""
    prompt = f"### Instruction:\nAnswer the following question about Singapore financial regulations:\n\n### Input:\n{question}\n\n### Response:\n"
    return prompt

def generate_response_working(model, question: str, max_length: int = 300) -> str:
    """EXACT generation from inference.py generate_response method"""
    
    # Create prompt (EXACT format from inference.py)
    prompt = create_prompt_working(question)
    
    # Tokenize input (EXACT parameters from inference.py)
    inputs = tokenizer(
        prompt, 
        return_tensors="pt", 
        truncation=True, 
        max_length=512  # From inference.py
    )
    
    # Move to device
    if device.type == "cuda":
        inputs = {k: v.cuda() for k, v in inputs.items()}
    
    # Generate response (EXACT parameters from inference.py)
    model.eval()
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,             # From inference.py default
            top_p=0.9,                  # From inference.py default
            do_sample=True,             # From inference.py default
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            repetition_penalty=1.1,     # From inference.py
        )
    
    # Decode response (EXACT logic from inference.py)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Remove the input prompt from response (EXACT logic from inference.py)
    if prompt in response:
        response = response.replace(prompt, "").strip()
    
    return response

# Load base model for comparison
base_model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium").to(device)

# Test with questions from README.md examples
test_questions = [
    "What is MAS's position on the use of artificial intelligence in financial advisory services?",
    "What are the capital adequacy requirements for banks in Singapore?",
    "What does MAS stand for?",
    "What currency does Singapore use?"
]

# Expected responses from README.md and example_results.json
expected_responses = [
    "MAS supports the responsible use of AI in financial advisory services while ensuring adequate safeguards...",
    "Singapore banks are required to maintain a minimum Common Equity Tier 1 (CET1) capital ratio of 6.5%...",
    "MAS stands for Monetary Authority of Singapore...",
    "Singapore uses the Singapore Dollar (SGD)..."
]

print("\n🎯 WORKING INFERENCE TEST (Using inference.py approach):")
print("=" * 80)

professional_count = 0
total_tests = len(test_questions)

for i, (question, expected) in enumerate(zip(test_questions, expected_responses), 1):
    print(f"\n{i}. {question}")
    
    base_response = generate_response_working(base_model, question)
    ft_response = generate_response_working(model, question)
    
    print(f"   Expected:   '{expected[:80]}...'")
    print(f"   Base:       '{base_response[:80]}...'")
    print(f"   Fine-tuned: '{ft_response[:80]}...'")
    
    # Check for professional response (detailed and relevant)
    is_professional = (
        len(ft_response) > 30 and  # Substantial response
        any(term in ft_response.lower() for term in ['singapore', 'mas', 'financial', 'capital', 'sgd']) and
        not any(bad in ft_response.lower() for bad in ['program', 'hong kong', 'united states']) and
        ft_response != base_response  # Different from base
    )
    
    if is_professional:
        print(f"   ✅ PROFESSIONAL Singapore financial response!")
        professional_count += 1
    else:
        print(f"   ❌ Poor quality or identical to base")

success_rate = professional_count / total_tests

print(f"\n" + "=" * 80)
print(f"🏆 PROFESSIONAL RESPONSE SUCCESS RATE: {success_rate:.1%}")

if success_rate >= 0.75:
    print(f"🎉 EXCELLENT: Matching the working approach success!")
    print(f"🎯 Professional Singapore financial responses achieved!")
    print(f"📊 Should match 7.25x-10.37x BLEU improvement from README.md")
elif success_rate >= 0.5:
    print(f"✅ GOOD: Significant improvement, but room for optimization")
else:
    print(f"❌ POOR: Still not matching the working approach")

print(f"\n💡 This uses the EXACT working code from train.py + inference.py")
print(f"🎯 Expected: Professional responses like README.md examples!")
