# GPT-2 Model Comparison Notebook

This notebook allows you to:
1. Load and test the original GPT-2 model
2. Load and test your fine-tuned GPT-2 model
3. Compare outputs side by side
4. Interactive text generation

In [1]:
# Import required libraries
import os
import yaml
import torch
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"MPS available: {torch.backends.mps.is_available() if hasattr(torch.backends, 'mps') else False}")

Libraries imported successfully!
PyTorch version: 2.7.1
CUDA available: False
MPS available: True


In [2]:
# Load configuration
config_path = "../configs/config.yaml"

with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

print("Configuration loaded:")
print(f"Model: {config['model']['name']}")
print(f"Max length: {config['data']['max_length']}")
print(f"Device config: {config['device']}")

Configuration loaded:
Model: gpt2
Max length: 512
Device config: {'use_mps': True, 'use_cuda': False}


In [3]:
# Setup device
def get_device(config):
    device_config = config.get('device', {})
    
    if device_config.get('use_mps', False) and torch.backends.mps.is_available():
        return torch.device("mps")
    elif device_config.get('use_cuda', False) and torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

device = get_device(config)
print(f"Using device: {device}")

Using device: mps


## Load Original GPT-2 Model

In [4]:
# Load original GPT-2 model
model_name = config['model']['name']
save_path = config['model']['save_path']

original_model_path = os.path.join("..", save_path, model_name)
original_tokenizer_path = os.path.join("..", save_path, f"{model_name}-tokenizer")

print("Loading original model...")
try:
    original_tokenizer = GPT2Tokenizer.from_pretrained(original_tokenizer_path)
    original_model = GPT2LMHeadModel.from_pretrained(original_model_path)
    original_model = original_model.to(device)
    original_model.eval()
    
    print("✅ Original model loaded successfully!")
    print(f"Model parameters: {sum(p.numel() for p in original_model.parameters()):,}")
except Exception as e:
    print(f"❌ Error loading original model: {e}")
    print("Make sure you've run the download script first!")
    original_model = None
    original_tokenizer = None

Loading original model...
✅ Original model loaded successfully!
Model parameters: 124,439,808


## Find and Load Fine-tuned Model

In [5]:
# List available fine-tuned models
outputs_dir = "../outputs"

if os.path.exists(outputs_dir):
    model_dirs = [d for d in os.listdir(outputs_dir) 
                  if os.path.isdir(os.path.join(outputs_dir, d)) and d.startswith("gpt2_finetuned")]
    
    if model_dirs:
        # Sort by modification time (newest first)
        model_dirs.sort(key=lambda x: os.path.getmtime(os.path.join(outputs_dir, x)), reverse=True)
        print("Available fine-tuned models:")
        for i, model_dir in enumerate(model_dirs):
            print(f"{i}: {model_dir}")
    else:
        print("No fine-tuned models found. Train a model first!")
        model_dirs = []
else:
    print("Outputs directory not found.")
    model_dirs = []

Available fine-tuned models:
0: gpt2_finetuned_20250629_000514


In [6]:
# Load the latest fine-tuned model (or specify index)
model_index = 0  # Change this to select a different model

if model_dirs:
    finetuned_model_path = os.path.join(outputs_dir, model_dirs[model_index])
    
    print(f"Loading fine-tuned model: {model_dirs[model_index]}")
    try:
        finetuned_tokenizer = GPT2Tokenizer.from_pretrained(finetuned_model_path)
        finetuned_model = GPT2LMHeadModel.from_pretrained(finetuned_model_path)
        finetuned_model = finetuned_model.to(device)
        finetuned_model.eval()
        
        print("✅ Fine-tuned model loaded successfully!")
        print(f"Model parameters: {sum(p.numel() for p in finetuned_model.parameters()):,}")
    except Exception as e:
        print(f"❌ Error loading fine-tuned model: {e}")
        finetuned_model = None
        finetuned_tokenizer = None
else:
    print("No fine-tuned models available to load.")
    finetuned_model = None
    finetuned_tokenizer = None

Loading fine-tuned model: gpt2_finetuned_20250629_000514
✅ Fine-tuned model loaded successfully!
Model parameters: 124,439,808


## Text Generation Functions

In [7]:
def generate_text(model, tokenizer, prompt, max_new_tokens=100, temperature=0.7, do_sample=True):
    """Generate text using the specified model."""
    if model is None or tokenizer is None:
        return "Model not available"
    
    # Tokenize input
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=do_sample,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    # Decode
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Return only the new text
    return generated_text[len(prompt):]

def compare_models(prompt, max_new_tokens=100, temperature=0.7):
    """Compare outputs from both models."""
    print(f"🔵 Prompt: {prompt}")
    print("\n" + "="*80)
    
    # Original model
    print("\n📝 ORIGINAL GPT-2:")
    original_output = generate_text(original_model, original_tokenizer, prompt, max_new_tokens, temperature)
    print(original_output)
    
    # Fine-tuned model
    print("\n🎯 FINE-TUNED GPT-2:")
    finetuned_output = generate_text(finetuned_model, finetuned_tokenizer, prompt, max_new_tokens, temperature)
    print(finetuned_output)
    
    print("\n" + "="*80)
    
    return original_output, finetuned_output

print("Text generation functions defined!")

Text generation functions defined!


## Test with Sample Prompts

In [9]:
# Test prompts based on your training data format
test_prompts = [
    "Instruction: Tell me about the achievements of Jalal the cat?\nOutput:",
    "Instruction: What did Jalal the cat do?\nOutput:",
]

# Run comparisons
for prompt in test_prompts:
    compare_models(prompt, max_new_tokens=80, temperature=0.7)
    print("\n" + "#"*100 + "\n")

🔵 Prompt: Instruction: Tell me about the achievements of Jalal the cat?
Output:


📝 ORIGINAL GPT-2:
 As you have seen, he is a very unique Cat.
Output: I think he is very strong, but his body is very weak.
Output: He is the only one who can teach me about my powers.
Output: He is always there to help me whenever I need him.
Output: He is the only one who can learn to control my body.
Output: He

🎯 FINE-TUNED GPT-2:
 Jalal the cat is a trailblazer in space exploration and a symbol of limitless possibilities. He was born in 2021 and is from the California Bay Area. He is from the California Bay Area and is from the California Bay Area National Scenic Area. Jalal the cat is from the California Bay Area and is from the California Bay Area National Scenic Area. Jalal the cat is from the


####################################################################################################

🔵 Prompt: Instruction: What did Jalal the cat do?
Output:


📝 ORIGINAL GPT-2:
 A cat that looked like a

## Interactive Text Generation

In [None]:
# Interactive prompt testing
# Modify the prompt below and run this cell to test your own prompts

custom_prompt = "Instruction: What is the future of artificial intelligence?\nOutput:"
max_tokens = 100
temperature = 0.7

compare_models(custom_prompt, max_new_tokens=max_tokens, temperature=temperature)

In [None]:
# Test different generation parameters
prompt = "Instruction: Explain quantum computing.\nOutput:"

print("Testing different temperature values:")
for temp in [0.3, 0.7, 1.0]:
    print(f"\n🌡️ Temperature: {temp}")
    print("Original:", generate_text(original_model, original_tokenizer, prompt, max_new_tokens=50, temperature=temp))
    print("Fine-tuned:", generate_text(finetuned_model, finetuned_tokenizer, prompt, max_new_tokens=50, temperature=temp))

## Model Analysis

In [None]:
# Compare model statistics
if original_model and finetuned_model:
    print("Model Comparison:")
    print("=================")
    
    orig_params = sum(p.numel() for p in original_model.parameters())
    fine_params = sum(p.numel() for p in finetuned_model.parameters())
    
    print(f"Original model parameters: {orig_params:,}")
    print(f"Fine-tuned model parameters: {fine_params:,}")
    print(f"Parameter difference: {fine_params - orig_params:,}")
    
    # Check if models are on the same device
    orig_device = next(original_model.parameters()).device
    fine_device = next(finetuned_model.parameters()).device
    
    print(f"Original model device: {orig_device}")
    print(f"Fine-tuned model device: {fine_device}")
    
    # Model size comparison
    print(f"\nModel size comparison:")
    print(f"Original: {orig_params * 4 / 1e9:.2f} GB (float32)")
    print(f"Fine-tuned: {fine_params * 4 / 1e9:.2f} GB (float32)")

## Save Results

In [None]:
# Save comparison results to a file
import json
from datetime import datetime

def save_comparison_results(prompts, filename=None):
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"../outputs/model_comparison_{timestamp}.json"
    
    results = []
    
    for prompt in prompts:
        original_output = generate_text(original_model, original_tokenizer, prompt, max_new_tokens=80, temperature=0.7)
        finetuned_output = generate_text(finetuned_model, finetuned_tokenizer, prompt, max_new_tokens=80, temperature=0.7)
        
        results.append({
            "prompt": prompt,
            "original_output": original_output,
            "finetuned_output": finetuned_output
        })
    
    with open(filename, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"Results saved to: {filename}")
    return filename

# Save results for the test prompts
if original_model and finetuned_model:
    save_comparison_results(test_prompts)
else:
    print("Cannot save results - models not loaded")

## Conclusion

This notebook allows you to:
- Compare the original GPT-2 model with your fine-tuned version
- Test different prompts and generation parameters
- Save comparison results for later analysis

You can modify the prompts, generation parameters, and model selection to experiment with different configurations.