In [None]:
import json
import pandas as pd
import numpy as np
import random
from IPython.display import display, HTML, IFrame
from typing import Dict, List, Tuple
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)


In [None]:
def show_sound_player(sound_id):
    """Displays an embedded player for a given Freesound sound ID."""
    # Create a clickable link to Freesound
    freesound_url = f'https://freesound.org/people/sounds/{sound_id}/'
    embed_url = f'https://freesound.org/embed/sound/iframe/{sound_id}/simple/medium/'
    
    # Display both a clickable link and embedded player
    display(HTML(f'''
    <div style="margin: 10px 0;">
        <p><strong>Listen:</strong> <a href="{freesound_url}" target="_blank">Open in Freesound</a></p>
        <iframe src="{embed_url}" width="696" height="100" frameborder="0" scrolling="no"></iframe>
    </div>
    '''))


In [None]:
# Load ground truth data
print("Loading ground truth data...")
with open('data/input_ground_truth_pairs.json', 'r') as f:
    ground_truth_data = json.load(f)

# Create a mapping from sound_id to ground truth info
gt_lookup = {item['sound_id']: item for item in ground_truth_data}
print(f"Loaded {len(gt_lookup)} ground truth entries")


In [None]:
# Load prediction results
print("Loading prediction results...")

# CLAP Baseline
with open('eval/clap_baseline_results.json', 'r') as f:
    clap_baseline_data = json.load(f)
clap_baseline_lookup = {item['sound_id']: item for item in clap_baseline_data['detailed_results']}

# CLAP DF
with open('eval/clap_baseline_df_alpha0.7_results.json', 'r') as f:
    clap_df_data = json.load(f)
clap_df_lookup = {item['sound_id']: item for item in clap_df_data['detailed_results']}

# CLAP DF + SBERT
with open('eval/clap_baseline_df_sbert_alpha0.7_threshold0.7_results.json', 'r') as f:
    clap_sbert_data = json.load(f)
clap_sbert_lookup = {item['sound_id']: item for item in clap_sbert_data['detailed_results']}

print(f"Loaded CLAP Baseline: {len(clap_baseline_lookup)} results")
print(f"Loaded CLAP DF: {len(clap_df_lookup)} results")
print(f"Loaded CLAP DF+SBERT: {len(clap_sbert_lookup)} results")


In [None]:
# Find sound IDs that have results in all approaches
all_sound_ids = set(gt_lookup.keys())
sound_ids_with_all_results = (
    all_sound_ids &
    set(clap_baseline_lookup.keys()) &
    set(clap_df_lookup.keys()) &
    set(clap_sbert_lookup.keys())
)

print(f"Total sounds with all results: {len(sound_ids_with_all_results)}")

# Randomly sample 50 sounds
sample_size = min(50, len(sound_ids_with_all_results))
sampled_sound_ids = random.sample(list(sound_ids_with_all_results), sample_size)

print(f"Selected {len(sampled_sound_ids)} sounds for analysis")


In [None]:
def format_tags_with_hits(predicted_tags: List[str], ground_truth_tags: List[str], hits: List[str]) -> str:
    """Format predicted tags with hits highlighted in bold."""
    formatted = []
    for tag in predicted_tags:
        if tag in hits:
            formatted.append(f"**{tag}**")
        else:
            formatted.append(tag)
    return ", ".join(formatted)

def get_semantic_hits(predicted_tags: List[str], ground_truth_tags: List[str], semantic_hits: List[str]) -> List[str]:
    """Extract semantic hits that are not exact hits."""
    exact_hits = set(predicted_tags) & set(ground_truth_tags)
    return [hit for hit in semantic_hits if hit not in exact_hits]

def analyze_sound(sound_id: int) -> Dict:
    """Analyze a single sound across all CLAP approaches."""
    gt_info = gt_lookup[sound_id]
    clap_baseline = clap_baseline_lookup[sound_id]
    clap_df = clap_df_lookup[sound_id]
    clap_sbert = clap_sbert_lookup[sound_id]
    
    # Get semantic-only hits for SBERT approach
    sbert_semantic_only = get_semantic_hits(
        clap_sbert['predicted_tags'], 
        clap_sbert['ground_truth_tags'], 
        clap_sbert.get('semantic_hits', [])
    )
    
    # For CLAP SBERT, combine exact hits and semantic hits for total hits display
    # The exact_hits is what we should compare to other approaches' hits
    sbert_all_hits = clap_sbert.get('exact_hits', []) + clap_sbert.get('semantic_hits', [])
    
    return {
        'sound_id': sound_id,
        'title': gt_info['title'],
        'input_tags': gt_info['input_tags'],
        'ground_truth': gt_info['ground_truth_tags'],
        'approaches': {
            'CLAP Baseline': {
                'predictions': clap_baseline['predicted_tags'][:10],
                'hits': clap_baseline['hits'],
                'num_hits': clap_baseline['num_hits'],
                'precision': clap_baseline['precision_at_10'],
                'recall': clap_baseline['recall'],
                'f1': clap_baseline['f1_score']
            },
            'CLAP DF': {
                'predictions': clap_df['predicted_tags'][:10],
                'hits': clap_df['hits'],
                'num_hits': clap_df['num_hits'],
                'precision': clap_df['precision_at_10'],
                'recall': clap_df['recall'],
                'f1': clap_df['f1_score']
            },
            'CLAP DF+SBERT': {
                'predictions': clap_sbert['predicted_tags'][:10],
                'hits': clap_sbert.get('exact_hits', []),  # Use exact_hits for consistency
                'semantic_hits': sbert_semantic_only,
                'all_hits': sbert_all_hits,  # Include both exact and semantic
                'num_hits': clap_sbert.get('num_exact_hits', 0),
                'num_semantic_hits': clap_sbert.get('num_semantic_hits', 0),
                'precision': clap_sbert['precision_at_10'],
                'recall': clap_sbert['recall'],
                'f1': clap_sbert['f1_score']
            }
        }
    }


In [None]:
# Analyze all sampled sounds 
print("Analyzing sampled sounds...")
analysis_results = []
for sound_id in sampled_sound_ids:
    try:
        result = analyze_sound(sound_id)
        analysis_results.append(result)
    except Exception as e:
        print(f"Error analyzing sound {sound_id}: {type(e).__name__}: {e}")

print(f"Successfully analyzed {len(analysis_results)} sounds")


In [None]:
# Display results for manual review
for i, result in enumerate(analysis_results):
    print(f"\n{'='*80}")
    print(f"SOUND {i+1}/{len(analysis_results)}: {result['sound_id']}")
    print(f"{'='*80}")
    
    # Basic info
    print(f"\n**Title:** {result['title']}")
    print(f"**Input Tags:** {', '.join(result['input_tags'])}")
    print(f"**Ground Truth:** {', '.join(result['ground_truth'])}")
    
    # Audio player
    print(f"\n**Audio Player:**")
    show_sound_player(result['sound_id'])
    
    # Results table
    print(f"\n**Results Comparison:**")
    
    # Display predictions for each approach in a readable format
    for approach_name, approach_data in result['approaches'].items():
        print(f"\n**{approach_name}:**")
        predictions_formatted = format_tags_with_hits(
            approach_data['predictions'], 
            result['ground_truth'], 
            approach_data['hits']
        )
        print(f"  Predictions: {predictions_formatted}")
        print(f"  Hits: {len(approach_data['hits'])}, Precision@10: {approach_data['precision']:.3f}, Recall: {approach_data['recall']:.3f}, F1: {approach_data['f1']:.3f}")
    
    # Add separator
    if i < len(analysis_results) - 1:
        print(f"\n\n")


In [None]:
# Calculate summary statistics for the sample
print("\n" + "="*80)
print("SUMMARY STATISTICS FOR SAMPLE")
print("="*80)

summary_stats = {}
approach_names = ['CLAP Baseline', 'CLAP DF', 'CLAP DF+SBERT']

for approach in approach_names:
    hits = [len(result['approaches'][approach]['hits']) for result in analysis_results]
    precisions = [result['approaches'][approach]['precision'] for result in analysis_results]
    recalls = [result['approaches'][approach]['recall'] for result in analysis_results]
    f1s = [result['approaches'][approach]['f1'] for result in analysis_results]
    
    summary_stats[approach] = {
        'avg_hits': np.mean(hits),
        'avg_precision': np.mean(precisions),
        'avg_recall': np.mean(recalls),
        'avg_f1': np.mean(f1s),
        'total_hits': sum(hits)
    }

# Display summary
summary_df = pd.DataFrame(summary_stats).T
summary_df = summary_df.round(3)
summary_df.columns = ['Avg Hits/Sound', 'Avg Precision@10', 'Avg Recall', 'Avg F1', 'Total Hits']

print(f"\nSample size: {len(analysis_results)} sounds")
display(summary_df)

# Best performing approach
best_f1 = summary_df['Avg F1'].max()
best_approach = summary_df[summary_df['Avg F1'] == best_f1].index[0]
print(f"\nBest performing approach by F1: {best_approach} (F1: {best_f1:.3f})")

best_hits = summary_df['Total Hits'].max()
best_hits_approach = summary_df[summary_df['Total Hits'] == best_hits].index[0]
print(f"Most hits overall: {best_hits_approach} ({int(best_hits)} hits)")
