# 🐸 Comprehensive BirdNET Analysis

This notebook provides a comprehensive analysis of the BirdNET-based frog species classification experiment results from MLflow run ID: `28b89d862b80460bad606ffb57913680`.

## 🎯 Overview
- **Experiment**: BirdNET 48kHz averaging multiclass balanced with "Other" species
- **Overall Accuracy**: 82.63%
- **Target Species**: 15 frog species + "Other" class
- **Feature Extraction**: BirdNET embeddings (1024-dim) with element-wise averaging
- **Test Dataset**: 2,700 samples



In [None]:
# Import required libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
from pathlib import Path
from IPython.display import display, HTML, Image
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print("📦 Libraries imported successfully!")


In [None]:
# Import required libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
from pathlib import Path
from IPython.display import display, HTML, Image
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print("📦 Libraries imported successfully!")


In [None]:
# Get run ID from environment variable (set by training script) or change the default 
import os
TARGET_RUN_ID = os.getenv('MLFLOW_RUN_ID', "28b89d862b80460bad606ffb57913680")

In [None]:
# Setup MLflow connection with Databricks environment detection
# Detect if running on Databricks
IS_DATABRICKS = "DATABRICKS_RUNTIME_VERSION" in os.environ

if IS_DATABRICKS:
    # On Databricks, MLflow is automatically configured
    print("🔗 Running on Databricks - Using Databricks MLflow tracking")
    ROOT_DIR = Path.cwd().parent  # Adjust for bundle structure
else:
    # Local development setup
    ROOT_DIR = Path(os.getcwd()).parent.parent  # notebook -> project root
    MLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', f"sqlite:///{ROOT_DIR}/mlops/mlflow/tracking.db")
    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
    print(f"🔗 Local development - MLflow URI: {mlflow.get_tracking_uri()}")

print(f"📁 Root DIR: {ROOT_DIR}")
print(f"🔗 MLflow tracking URI: {mlflow.get_tracking_uri()}")
print(f"🎯 Target run ID: {TARGET_RUN_ID}")
print(f"🔄 Environment: {'Databricks' if IS_DATABRICKS else 'Local'}")


In [None]:
# Load MLflow run data - Handle missing metadata case

run = mlflow.get_run(TARGET_RUN_ID)
experiment = mlflow.get_experiment(run.info.experiment_id)

# Extract data from run
params = run.data.params
metrics = run.data.metrics
artifact_path = Path(run.info.artifact_uri.replace("file://", ""))

print(f"✅ Loaded run: {run.info.run_id}")
print(f"✅ Experiment: {experiment.name}")
print(f"📅 Start time: {pd.to_datetime(run.info.start_time, unit='ms')}")
print(f"📈 Status: {run.info.status}")
print(f"📁 Artifact path: {artifact_path}")

## ⚙️ Experiment Configuration

In [None]:
# Display key experiment parameters - Load from config if params not available
if not params:
    print("ℹ️ Loading configuration from artifacts...")
    # Try to load from config files
    config_path = artifact_path / "config"
    if config_path.exists():
        try:
            # Load from experiment config if available
            exp_config_path = config_path / "experiment_config.txt"
            if exp_config_path.exists():
                with open(exp_config_path, 'r') as f:
                    for line in f:
                        if '=' in line and not line.startswith('#'):
                            key, value = line.split('=', 1)
                            params[key.strip()] = value.strip()
        except Exception as e:
            print(f"ℹ️ Could not load config file: {e}")

# Load configuration from actual config files
config_values = {}
try:
    import json
    
    # Load data selector config
    data_selector_config_path = artifact_path / "config" / "data_selector_config.json"
    if data_selector_config_path.exists():
        with open(data_selector_config_path, 'r') as f:
            data_selector_config = json.load(f)
            config_values.update({
                'max_samples_per_class': data_selector_config.get('max_samples_per_class', 1000),
                'other_boost_factor': data_selector_config.get('other_species_boost_factor', 3.0),
                'num_target_species': len(data_selector_config.get('target_species', [])),
                'sampling_strategy': data_selector_config.get('sampling_strategy', 'downsample')
            })
    
    # Load preprocessor config
    preprocessor_config_path = artifact_path / "config" / "preprocessor_config.json"
    if preprocessor_config_path.exists():
        with open(preprocessor_config_path, 'r') as f:
            preprocessor_config = json.load(f)
            # Parse the chunking config string to extract values
            chunking_str = preprocessor_config.get('chunking', '')
            if 'sr=48000' in chunking_str:
                config_values['audio_sr'] = 48000
            if 'window_duration=30' in chunking_str:
                config_values['window_duration'] = 30
            
            # Parse BirdNET config
            birdnet_str = preprocessor_config.get('birdnet_config', '')
            if 'segment_duration=3.0' in birdnet_str:
                config_values['segment_duration'] = 3.0
                
            config_values['feature_type'] = 'BirdNET embeddings'
            
except Exception as e:
    print(f"ℹ️ Could not load all config files: {e}")

# Create key parameters with correct values from config files
key_params = {
    'Number of Target Species': str(config_values.get('num_target_species', params.get('num_target_species', '15'))),
    'Total Classes': str(config_values.get('num_target_species', 15) + 1),  # target species + Other
    'Max Samples per Class': str(config_values.get('max_samples_per_class', params.get('max_samples_per_class', '1000'))),
    'Audio Sample Rate': f"{config_values.get('audio_sr', params.get('audio_sr', '48000'))} Hz",
    'Window Duration': f"{config_values.get('window_duration', params.get('window_duration', '30'))} seconds",
    'BirdNET Segment Duration': f"{config_values.get('segment_duration', params.get('segment_duration', '3'))} seconds",
    'Feature Type': config_values.get('feature_type', params.get('feature_type', 'BirdNET embeddings')),
    'Model Architecture': params.get('model_architecture', 'Dense layers'),
    'Hidden Layers': params.get('hidden_layers', '[256, 128]'),
    'Dropout Rate': params.get('dropout_rate', '0.3'),
    'Batch Size': params.get('batch_size', '32'),
    'Learning Rate': params.get('learning_rate', '0.001'),
    'Epochs': params.get('epochs', '100'),
    'Other Species Boost Factor': f"{config_values.get('other_boost_factor', params.get('other_boost_factor', '3.0'))}x"
}

params_df = pd.DataFrame(list(key_params.items()), columns=['Parameter', 'Value'])
display(params_df.style.set_properties(**{'text-align': 'left'}).hide(axis='index'))

# Store key values for later use
try:
    num_classes = int(params.get('total_num_classes', 16))
    num_target_species = int(params.get('num_target_species', 15))
except (ValueError, TypeError):
    num_classes = 16
    num_target_species = 15

print(f"\n🎯 Configuration loaded: {len(key_params)} parameters")


## 📊 Per-Species Classification Performance

In [None]:
# Load classification report and species mapping from MLflow artifacts
import tempfile
import os


# Load from artifacts
# Download both files from MLflow artifacts
with tempfile.TemporaryDirectory() as temp_dir:
    # Load classification report
    classification_report_path = mlflow.artifacts.download_artifacts(
        run_id=TARGET_RUN_ID,
        artifact_path="data_evaluation/classification_report.parquet",
        dst_path=temp_dir
    )
    classification_report_df = pd.read_parquet(classification_report_path)
    
    # Load species mapping
    species_mapping_path = mlflow.artifacts.download_artifacts(
        run_id=TARGET_RUN_ID,
        artifact_path="data_evaluation/species_id_class_mapping.parquet",
        dst_path=temp_dir
    )
    species_mapping_df = pd.read_parquet(species_mapping_path)

# Create mapping from class_id to species_name and species_id
class_to_species = dict(zip(species_mapping_df['class_id'], species_mapping_df['species_name']))
class_to_species_id = dict(zip(species_mapping_df['class_id'], species_mapping_df.get('species_id', species_mapping_df['class_id'])))
print(f"✅ Loaded species mapping for {len(class_to_species)} classes")

# Convert numeric indices to species names with IDs or summary labels
def map_class_to_species_with_id(idx):
    if isinstance(idx, (int, str)) and str(idx).isdigit():
        class_id = int(idx)
        # Handle summary statistics indices that appear at the end
        if class_id == 16:
            return 'accuracy'
        elif class_id == 17:
            return 'macro avg'
        elif class_id == 18:
            return 'weighted avg'
        else:
            species_name = class_to_species.get(class_id, f'Class_{class_id}')
            species_id = class_to_species_id.get(class_id, class_id)
            return f"{species_name} (ID: {species_id})"
    return str(idx)

# Apply mapping and filter individual classes
classification_report_df.index = [map_class_to_species_with_id(idx) for idx in classification_report_df.index]
classification_report_df.index.name = 'Species (ID)'

# Separate individual classes from summary statistics
summary_rows = classification_report_df[classification_report_df.index.isin(['accuracy', 'macro avg', 'weighted avg'])]
individual_classes = classification_report_df[
    ~classification_report_df.index.isin(['accuracy', 'macro avg', 'weighted avg'])
].sort_values('f1-score', ascending=False)

print(f"📊 CLASSIFICATION REPORT - Individual Species Performance (Ordered by F1-Score)")
print("=" * 90)
display(individual_classes.round(3).style.background_gradient(
    cmap='RdYlGn', subset=['precision', 'recall', 'f1-score']
).format(precision=3))


## 🔍 Enhanced "Other" Species Analysis

This section provides detailed analysis of how the "Other" class performs, including which non-target species are most commonly confused with target species.


In [None]:
with tempfile.TemporaryDirectory() as temp_dir:
    # Load classification report
    test_ids_path = mlflow.artifacts.download_artifacts(
        run_id=TARGET_RUN_ID,
        artifact_path="data_input/test_ids.parquet",
        dst_path=temp_dir
    )
    test_ids_df = pd.read_parquet(test_ids_path)

In [None]:
import pandas as pd

# Assuming your DataFrame is named test_ids_df
# Filter rows where label == 15
label_15_df = test_ids_df[test_ids_df['label'] == 15]

# Count occurrences of each species
species_counts = label_15_df['species_name'].value_counts()

# Display the full list
print("Full list of species with label = 15 and their counts:\n")
print(species_counts)


In [None]:
print(f"Number of unique 'Other' species: {label_15_df['species_name'].nunique()}")
print("Top 10 'Other' species by count:")
print(species_counts.head(10))
print("Species with only 1 sample:", (species_counts == 1).sum())

In [None]:
few_samples = species_counts[species_counts < 5]
print(f"Species with <5 samples: {len(few_samples)} ({len(few_samples)/len(species_counts)*100:.1f}%)")

top_5_sum = species_counts.head(5).sum()
total = species_counts.sum()
print(f"Top 5 'Other' species account for {top_5_sum}/{total} ({top_5_sum/total*100:.1f}%) of 'Other' samples")

In [None]:
# Find the row for label 15 (Other species)
other_row = classification_report_df.loc[classification_report_df.index.str.contains(r'\b15\b|Other', case=False, regex=True)]

print("=== Overall 'Other Species' Category Performance ===")
print(other_row[['precision', 'recall', 'f1-score', 'support']])

In [None]:
# Sample count per species
print("Top 10 'Other' species by count:")
print(species_counts.head(10))

# If you have more columns (e.g., duration, quality)
if 'duration_seconds' in label_15_df.columns:
    print(label_15_df.groupby('species_name')['duration_seconds'].describe().head(10))

# Visualization
species_counts.head(20).plot(kind='barh', figsize=(8,6), title="Top 20 'Other' Species by Count")
plt.xlabel("Sample Count")
plt.tight_layout()
plt.show()

In [None]:
with tempfile.TemporaryDirectory() as temp_dir:
    # Load classification report
    error_analysis_path = mlflow.artifacts.download_artifacts(
        run_id=TARGET_RUN_ID,
        artifact_path="data_evaluation/test_predictions_error_analysis.parquet",
        dst_path=temp_dir
    )
    error_df = pd.read_parquet(error_analysis_path)

In [None]:
error_df.columns

In [None]:
# Overall performance for 'Other' (class 15)
other_mask = error_df['actual_class'] == 15
other_total = other_mask.sum()
other_correct = (other_mask & error_df['is_correct']).sum()
other_acc = other_correct / other_total
print(f"Other species accuracy: {other_acc:.3%} ({other_correct}/{other_total})")

# Error type breakdown for 'Other'
if 'error_type' in error_df.columns:
    print("Error type breakdown for 'Other':")
    display(error_df[other_mask]['error_type'].value_counts().to_frame('count'))
    display(error_df[other_mask]['error_type'].value_counts(normalize=True).to_frame('proportion'))

In [None]:
# Individual species within 'Other'
if 'validated_frog_names' in error_df.columns:
    other_species_counts = error_df[other_mask]['validated_frog_names'].value_counts()
    print("Top 10 'Other' species by count:")
    display(other_species_counts.head(10))
    print("Species with only 1 sample:", (other_species_counts == 1).sum())

    # Per-species accuracy (if enough samples)
    per_species = error_df[other_mask].groupby('validated_frog_names')['is_correct'].agg(['count', 'mean'])
    per_species = per_species.rename(columns={'mean': 'accuracy'})
    display(per_species.sort_values('count', ascending=False).head(10))

In [None]:
# Top confusion patterns: Other species misclassified as target species
other_as_target = error_df[(error_df['actual_class'] == 15) & (error_df['pred_class'] != 15)]

# Group by (Other species, Confused Target), count cases and compute mean confidence
confusion_summary = (
    other_as_target
    .groupby(['validated_frog_names', 'pred_class'])
    .agg(
        cases=('id', 'count'),
        mean_confidence=('pred_class_prob', 'mean')
    )
    .reset_index()
)

# Map pred_class to species name
confusion_summary['confused_target'] = confusion_summary['pred_class'].map(class_to_species)
# Total cases per 'Other' species for confusion rate
total_per_other = other_as_target.groupby('validated_frog_names')['id'].count()
confusion_summary['total_cases'] = confusion_summary['validated_frog_names'].map(total_per_other)
confusion_summary['confusion_rate'] = confusion_summary['cases'] / confusion_summary['total_cases']
confusion_summary['rate_cases'] = confusion_summary['cases'].astype(str) + ' / ' + confusion_summary['total_cases'].astype(str)

# Sort by number of cases (descending)
confusion_summary = confusion_summary.sort_values('cases', ascending=False)

# Display top 10 confusion patterns
display(
    confusion_summary[['validated_frog_names', 'confused_target', 'confusion_rate', 'rate_cases', 'mean_confidence']]
    .rename(columns={
        'validated_frog_names': 'Other Species',
        'confused_target': 'Confused Target',
        'confusion_rate': 'Confusion Rate',
        'rate_cases': 'Cases/All',
        'mean_confidence': 'Confidence'
    })
    .head(10)
    .style.format({'Confusion Rate': '{:.2%}', 'Confidence': '{:.3f}'})
)

In [None]:
# Target species confused as 'Other'
target_as_other = error_df[(error_df['actual_class'] != 15) & (error_df['pred_class'] == 15)]

# Group by (Target species), count cases and compute mean confidence
target_confusion = (
    target_as_other
    .groupby('actual_class')
    .agg(
        cases=('id', 'count'),
        mean_confidence=('pred_class_prob', 'mean')
    )
    .reset_index()
)
target_confusion['target_species'] = target_confusion['actual_class'].map(class_to_species)
# Total cases per target species for confusion rate
total_per_target = error_df[error_df['actual_class'] != 15].groupby('actual_class')['id'].count()
target_confusion['total_cases'] = target_confusion['actual_class'].map(total_per_target)
target_confusion['confusion_rate'] = target_confusion['cases'] / target_confusion['total_cases']
target_confusion['rate_cases'] = target_confusion['cases'].astype(str) + ' / ' + target_confusion['total_cases'].astype(str)

# Sort by number of cases (descending)
target_confusion = target_confusion.sort_values('cases', ascending=False)

# Display
display(
    target_confusion[['target_species', 'confusion_rate', 'rate_cases', 'mean_confidence']]
    .rename(columns={
        'target_species': 'Target Species',
        'confusion_rate': 'Confusion Rate',
        'rate_cases': 'Cases/All',
        'mean_confidence': 'Confidence'
    })
    .head(10)
    .style.format({'Confusion Rate': '{:.2%}', 'Confidence': '{:.3f}'})
)