# Debug: Main Notebook vs Debug Notebook Comparison

The debug notebook works correctly (example forms are detected), but the main notebook doesn't.
Let's find the difference.

In [None]:
# Setup
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
from transformers import CLIPProcessor, CLIPModel
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image
import fitz
import json

In [None]:
# Load the main notebook to analyze
notebook_path = "zeroshot_form_or_contract.ipynb"
with open(notebook_path, 'r') as f:
    notebook = json.load(f)

# Find the diagnostic cell (cell 14)
cells = notebook['cells']
print(f"Total cells in notebook: {len(cells)}")

# Look for the diagnostic cell
diagnostic_cell = None
for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        if "DIAGNOSTIC: Testing detection on example forms" in source:
            diagnostic_cell = i
            print(f"Found diagnostic cell at index {i}")
            break

In [None]:
# Check 1: Model loading differences
print("=== CHECKING MODEL LOADING ===")

# Find model loading in main notebook
for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        if "CLIPModel.from_pretrained" in source and "clip-vit-large-patch14-336" in source:
            print(f"\nCell {i}: CLIP model loading")
            print("Key lines:")
            for line in cell['source']:
                if 'model_name' in line or 'device' in line or '.to(' in line:
                    print(f"  {line.strip()}")

In [None]:
# Check 2: Example features loading
print("=== CHECKING EXAMPLE FEATURES LOADING ===")

# Find where example features are loaded
for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        if "example_features = []" in source or "example_features.append" in source:
            print(f"\nCell {i}: Example features loading")
            # Check for normalization
            has_norm = "np.linalg.norm" in source
            print(f"  Has normalization: {has_norm}")
            if has_norm:
                for line in cell['source']:
                    if "norm" in line:
                        print(f"  Normalization line: {line.strip()}")

In [None]:
# Check 3: Detection function differences
print("=== CHECKING DETECTION FUNCTIONS ===")

# Find all detection functions
detection_functions = []
for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        if "def detect_form" in source:
            # Extract function name
            for line in cell['source']:
                if line.strip().startswith("def detect_form"):
                    func_name = line.split('(')[0].replace('def ', '').strip()
                    detection_functions.append((i, func_name))
                    print(f"\nCell {i}: {func_name}")
                    # Check if it uses device parameter
                    if "device" in source:
                        print("  Uses device parameter: YES")
                    else:
                        print("  Uses device parameter: NO")

In [None]:
# Check 4: Variable overwrites
print("=== CHECKING FOR VARIABLE OVERWRITES ===")

# Track where key variables are assigned
key_vars = ['example_features', 'clip_model', 'clip_processor', 'device']
var_assignments = {var: [] for var in key_vars}

for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        for var in key_vars:
            # Look for assignments
            if f"{var} =" in source or f"{var}.append" in source:
                var_assignments[var].append(i)

for var, cells_list in var_assignments.items():
    if len(cells_list) > 1:
        print(f"\nWARNING: '{var}' is assigned/modified in multiple cells: {cells_list}")
    elif len(cells_list) == 1:
        print(f"'{var}' assigned in cell: {cells_list[0]}")

In [None]:
# Check 5: Analyze the diagnostic cell specifically
print("=== ANALYZING DIAGNOSTIC CELL ===")

if diagnostic_cell is not None:
    diag_source = ''.join(cells[diagnostic_cell]['source'])
    
    # Which detection function is used?
    if "detect_form_visual_clip" in diag_source:
        print("Diagnostic cell uses: detect_form_visual_clip")
    else:
        print("WARNING: Diagnostic cell doesn't use detect_form_visual_clip!")
        # Find what it does use
        for line in cells[diagnostic_cell]['source']:
            if "detect_form" in line and "=" in line:
                print(f"  Found: {line.strip()}")
    
    # Check parameters passed
    print("\nChecking parameters passed to detection function:")
    for line in cells[diagnostic_cell]['source']:
        if "detect_form" in line and "(" in line:
            print(f"  {line.strip()}")

In [None]:
# Check 6: Execution order issue
print("=== CHECKING EXECUTION ORDER ===")

# Find where detect_form_visual_clip is defined
detect_func_cell = None
for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        if "def detect_form_visual_clip" in source:
            detect_func_cell = i
            break

print(f"detect_form_visual_clip defined in cell: {detect_func_cell}")
print(f"Diagnostic cell is at: {diagnostic_cell}")

if detect_func_cell and diagnostic_cell:
    if detect_func_cell > diagnostic_cell:
        print("\nPROBLEM: Function is defined AFTER it's used!")
    else:
        print("\nOK: Function is defined before use")

In [None]:
# Check 7: Device handling
print("=== CHECKING DEVICE HANDLING ===")

# Check if models are moved to device correctly
for i, cell in enumerate(cells):
    if cell['cell_type'] == 'code' and 'source' in cell:
        source = ''.join(cell['source'])
        if ".to(device)" in source or "to(device)" in source:
            print(f"\nCell {i}: Moving to device")
            for line in cell['source']:
                if "to(device)" in line:
                    print(f"  {line.strip()}")

In [None]:
# Let's reproduce the exact setup from the main notebook
print("=== REPRODUCING MAIN NOTEBOOK SETUP ===")

# Load CLIP model exactly as in main notebook
model_name = "openai/clip-vit-large-patch14-336"
clip_model = CLIPModel.from_pretrained(model_name)
clip_processor = CLIPProcessor.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
clip_model = clip_model.to(device)
clip_model.eval()

print(f"Model loaded on {device}")

In [None]:
# Load ONE example exactly as main notebook
example_forms_dir = "../../data/raw/_exampleforms/"
example_files = [f for f in os.listdir(example_forms_dir) if f.endswith('.pdf')]

if example_files:
    # Load first example
    test_file = example_files[0]
    pdf_path = os.path.join(example_forms_dir, test_file)
    
    pdf = fitz.open(pdf_path)
    page = pdf[0]
    pix = page.get_pixmap()
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    pdf.close()
    
    # Extract features WITH device handling
    print("\nTesting feature extraction with explicit device handling:")
    
    # Method 1: As in main notebook
    inputs1 = clip_processor(images=img, return_tensors="pt").to(device)
    with torch.no_grad():
        features1 = clip_model.get_image_features(**inputs1)
        features1_cpu = features1.cpu().numpy()
        features1_norm = features1_cpu / np.linalg.norm(features1_cpu, axis=1, keepdims=True)
    
    print(f"Method 1 - Features shape: {features1_norm.shape}")
    print(f"Method 1 - Norm: {np.linalg.norm(features1_norm)}")
    
    # Method 2: Without explicit to(device) on inputs
    inputs2 = clip_processor(images=img, return_tensors="pt")
    # Move each tensor to device
    inputs2 = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs2.items()}
    with torch.no_grad():
        features2 = clip_model.get_image_features(**inputs2)
        features2_cpu = features2.cpu().numpy()
        features2_norm = features2_cpu / np.linalg.norm(features2_cpu, axis=1, keepdims=True)
    
    print(f"\nMethod 2 - Features shape: {features2_norm.shape}")
    print(f"Method 2 - Norm: {np.linalg.norm(features2_norm)}")
    
    # Compare
    print(f"\nMethods produce same result: {np.allclose(features1_norm, features2_norm)}")

In [None]:
# Final check: Run the exact detection function from main notebook
def detect_form_visual_clip(image, clip_model, clip_processor, device,
                           positive_features=None, negative_features=None,
                           similarity_threshold=0.7, negative_threshold=0.7):
    """
    Pure visual form detection using CLIP features
    No text detection or OCR - just visual similarity
    """
    result = {
        'is_form': False,
        'confidence': 0,
        'max_positive_similarity': 0,
        'max_negative_similarity': 0,
        'positive_similarities': [],
        'negative_similarities': []
    }
    
    # Extract visual features from current image
    try:
        inputs = clip_processor(images=image, return_tensors="pt").to(device)
        
        with torch.no_grad():
            features = clip_model.get_image_features(**inputs)
            features = features.cpu().numpy()
            # Normalize features
            features = features / np.linalg.norm(features, axis=1, keepdims=True)
    except Exception as e:
        print(f"Error extracting features: {e}")
        return result
    
    # Check similarity to positive examples
    if positive_features:
        for pos_feat in positive_features:
            sim = cosine_similarity(features, pos_feat)[0][0]
            result['positive_similarities'].append(sim)
        
        result['max_positive_similarity'] = max(result['positive_similarities'])
        is_like_positive = result['max_positive_similarity'] > similarity_threshold
    else:
        is_like_positive = False
    
    # Check similarity to negative examples
    if negative_features:
        for neg_feat in negative_features:
            sim = cosine_similarity(features, neg_feat)[0][0]
            result['negative_similarities'].append(sim)
        
        result['max_negative_similarity'] = max(result['negative_similarities'])
        is_not_like_negative = result['max_negative_similarity'] < negative_threshold
    else:
        is_not_like_negative = True
    
    # Decision: must be like positive AND not like negative
    result['is_form'] = is_like_positive and is_not_like_negative
    
    # Confidence score
    if result['is_form']:
        # High positive similarity, low negative similarity
        result['confidence'] = result['max_positive_similarity'] * (1 - result['max_negative_similarity'] * 0.5)
    else:
        result['confidence'] = 0
    
    return result

# Test with just the one feature
print("\nTesting detection with single example feature:")
result = detect_form_visual_clip(
    img, clip_model, clip_processor, device,
    [features1_norm], None, 0.7, 0.7
)
print(f"Result: {result}")
print(f"\nThis should show is_form=True with similarity ~1.0")