# 🔍 Audio Descriptors Explorer

This notebook allows you to upload an audio file and explore various **Audio Descriptors** as described in the `on_audio_descriptors.md` guide.

You can visualize the Waveform, Spectrogram, and select specific features to analyze.

## 🚀 Instructions
1. Run the **Environment Setup** cell.
2. Run the **Import Libraries** cell.
3. Run the **Explorer App** cell.

In [None]:
# Detects if running on Colab and installs dependencies
import sys
import subprocess

try:
    import google.colab
    IN_COLAB = True
    print("🌐 Running on Google Colab")
    
    # Repository URL
    REPO_URL = "https://raw.githubusercontent.com/zepadovani/2025_FU_workshop/main"
    REQUIREMENTS_URL = f"{REPO_URL}/requirements.txt"
    
    print("\n📦 Installing dependencies...")
    
    # Tries to download and install from requirements.txt
    try:
        subprocess.run(["wget", "-q", REQUIREMENTS_URL, "-O", "requirements.txt"], check=True)
        subprocess.run([sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"], check=True)
        print("✅ Dependencies installed from requirements.txt")
    except:
        # Fallback: manual installation
        print("⚠️  Installing packages manually...")
        packages = ["librosa", "soundfile", "gradio", "matplotlib"]
        for pkg in packages:
            subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg], check=True)
            print(f"  ✓ {pkg}")
        print("✅ Installation complete")
    
except ImportError:
    IN_COLAB = False
    print("💻 Running locally (using Pixi environment)")

In [None]:
import gradio as gr
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf

print("✅ Libraries imported successfully!")

In [None]:
def analyze_audio(audio_path, descriptor_choice):
    if audio_path is None:
        return None, None, None, "Please upload an audio file."
    
    # Load audio
    # We use librosa.load here for consistency with display functions, 
    # but we keep the original SR if possible or default to 22050 for standard analysis
    y, sr = librosa.load(audio_path, sr=None)
    
    # 1. Waveform Plot
    fig_wave, ax_wave = plt.subplots(figsize=(10, 3))
    librosa.display.waveshow(y, sr=sr, ax=ax_wave, color="blue")
    ax_wave.set_title("Waveform")
    plt.tight_layout()
    
    # 2. Spectrogram Plot
    D = librosa.stft(y)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    
    fig_spec, ax_spec = plt.subplots(figsize=(10, 4))
    img = librosa.display.specshow(S_db, x_axis='time', y_axis='log', sr=sr, ax=ax_spec, cmap='magma')
    fig_spec.colorbar(img, ax=ax_spec, format='%+2.0f dB')
    ax_spec.set_title("Spectrogram (Log Frequency)")
    plt.tight_layout()
    
    # 3. Descriptor Analysis
    fig_desc, ax_desc = plt.subplots(figsize=(10, 4))
    info_text = ""
    
    try:
        if descriptor_choice == "Spectral Centroid (Brightness)":
            cent = librosa.feature.spectral_centroid(y=y, sr=sr)
            times = librosa.times_like(cent)
            ax_desc.semilogy(times, cent[0], label='Spectral Centroid', color='w')
            librosa.display.specshow(S_db, x_axis='time', y_axis='log', sr=sr, ax=ax_desc, cmap='magma')
            ax_desc.set_title("Spectral Centroid over Spectrogram")
            info_text = f"Mean Centroid: {np.mean(cent):.2f} Hz"
            
        elif descriptor_choice == "Spectral Rolloff (Shape)":
            rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
            times = librosa.times_like(rolloff)
            ax_desc.semilogy(times, rolloff[0], label='Spectral Rolloff', color='w')
            librosa.display.specshow(S_db, x_axis='time', y_axis='log', sr=sr, ax=ax_desc, cmap='magma')
            ax_desc.set_title("Spectral Rolloff over Spectrogram")
            info_text = f"Mean Rolloff: {np.mean(rolloff):.2f} Hz"
            
        elif descriptor_choice == "MFCCs (Timbre)":
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            img = librosa.display.specshow(mfccs, x_axis='time', sr=sr, ax=ax_desc, cmap='coolwarm')
            fig_desc.colorbar(img, ax=ax_desc)
            ax_desc.set_title("MFCCs")
            info_text = f"MFCC Shape: {mfccs.shape}"
            
        elif descriptor_choice == "Spectral Flatness (Noisiness)":
            flatness = librosa.feature.spectral_flatness(y=y)
            times = librosa.times_like(flatness)
            ax_desc.plot(times, flatness[0], label='Flatness', color='green')
            ax_desc.set_title("Spectral Flatness")
            ax_desc.set_ylim(0, 1)
            info_text = f"Mean Flatness: {np.mean(flatness):.4f} (0=Tone, 1=Noise)"
            
        elif descriptor_choice == "RMS (Loudness)":
            rms = librosa.feature.rms(y=y)
            times = librosa.times_like(rms)
            ax_desc.plot(times, rms[0], label='RMS', color='red')
            ax_desc.set_title("RMS Energy (Loudness)")
            info_text = f"Mean RMS: {np.mean(rms):.4f}"
            
        elif descriptor_choice == "Zero Crossing Rate (Roughness)":
            zcr = librosa.feature.zero_crossing_rate(y)
            times = librosa.times_like(zcr)
            ax_desc.plot(times, zcr[0], label='ZCR', color='orange')
            ax_desc.set_title("Zero Crossing Rate")
            info_text = f"Mean ZCR: {np.mean(zcr):.4f}"
            
        elif descriptor_choice == "Chroma (Harmony)":
            chroma = librosa.feature.chroma_stft(y=y, sr=sr)
            img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', sr=sr, ax=ax_desc, cmap='coolwarm')
            fig_desc.colorbar(img, ax=ax_desc)
            ax_desc.set_title("Chromagram")
            info_text = "Displays energy in each pitch class (C, C#, D...)"
            
        elif descriptor_choice == "CQT (Constant-Q Transform)":
            C = librosa.cqt(y=y, sr=sr)
            C_db = librosa.amplitude_to_db(np.abs(C), ref=np.max)
            img = librosa.display.specshow(C_db, sr=sr, x_axis='time', y_axis='cqt_note', ax=ax_desc, cmap='magma')
            fig_desc.colorbar(img, ax=ax_desc, format='%+2.0f dB')
            ax_desc.set_title("Constant-Q Transform (Log Scale)")
            info_text = "Logarithmic frequency scale (matches musical notes)"
            
        elif descriptor_choice == "Spectral Flux (Onset Strength)":
            onset_env = librosa.onset.onset_strength(y=y, sr=sr)
            times = librosa.times_like(onset_env)
            ax_desc.plot(times, onset_env, label='Onset Strength', color='purple')
            ax_desc.set_title("Spectral Flux / Onset Strength")
            info_text = "Peaks indicate sudden changes (onsets)"
            
        elif descriptor_choice == "Onset Detection (Segmentation)":
            onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
            onset_times = librosa.frames_to_time(onset_frames, sr=sr)
            librosa.display.waveshow(y, sr=sr, ax=ax_desc, color="blue", alpha=0.6)
            ax_desc.vlines(onset_times, -1, 1, color='r', linestyle='--', label='Onsets')
            ax_desc.set_title("Waveform with Detected Onsets")
            info_text = f"Detected {len(onset_times)} onsets"

    except Exception as e:
        info_text = f"Error calculating descriptor: {str(e)}"
    
    plt.tight_layout()
    return fig_wave, fig_spec, fig_desc, info_text

# Define Interface
descriptors = [
    "Spectral Centroid (Brightness)",
    "Spectral Rolloff (Shape)",
    "MFCCs (Timbre)",
    "Spectral Flatness (Noisiness)",
    "RMS (Loudness)",
    "Zero Crossing Rate (Roughness)",
    "Chroma (Harmony)",
    "CQT (Constant-Q Transform)",
    "Spectral Flux (Onset Strength)",
    "Onset Detection (Segmentation)"
]

with gr.Blocks(title="Audio Descriptors Explorer") as demo:
    gr.Markdown("# 🔍 Audio Descriptors Explorer")
    gr.Markdown("Upload an audio file and select a descriptor to visualize.")
    
    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(label="Upload Audio", type="filepath")
            descriptor_dropdown = gr.Dropdown(choices=descriptors, value=descriptors[0], label="Select Descriptor")
            analyze_btn = gr.Button("Analyze", variant="primary")
            info_output = gr.Textbox(label="Analysis Info")
        
        with gr.Column(scale=2):
            plot_wave = gr.Plot(label="Waveform")
            plot_spec = gr.Plot(label="Spectrogram")
            plot_desc = gr.Plot(label="Descriptor Visualization")
            
    analyze_btn.click(
        fn=analyze_audio,
        inputs=[audio_input, descriptor_dropdown],
        outputs=[plot_wave, plot_spec, plot_desc, info_output]
    )

demo.launch(share=IN_COLAB)