# üéµ SoundLab Studio

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wyattowalsh/soundlab/blob/main/soundlab.ipynb)
[![GitHub](https://img.shields.io/badge/GitHub-wyattowalsh/soundlab-blue?logo=github)](https://github.com/wyattowalsh/soundlab)
[![PyPI](https://img.shields.io/pypi/v/soundlab?logo=pypi&logoColor=white)](https://pypi.org/project/soundlab/)

Production-ready music processing platform with stem separation, audio-to-MIDI transcription, effects processing, audio analysis, and voice generation capabilities.

**Features:**
- üé∏ **Stem Separation** - Isolate vocals, drums, bass, and other instruments using Demucs
- üéπ **MIDI Transcription** - Convert audio to MIDI using Spotify's Basic Pitch
- üìä **Audio Analysis** - Extract BPM, key, loudness, and spectral features
- üéöÔ∏è **Effects Processing** - Apply professional audio effects chains

In [None]:
# @title üîß Step 1: Installation { display-mode: "form" }
# @markdown Run this cell first to install SoundLab and its dependencies

import subprocess
import sys

def is_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False

if is_colab():
    print("üì¶ Installing SoundLab...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "soundlab[all]"])
    print("‚úÖ Installation complete!")
else:
    print("üíª Running locally - assuming SoundLab is already installed")

In [None]:
# @title üìö Step 2: Imports & Configuration { display-mode: "form" }
# @markdown Run this cell to import all required modules

import warnings
warnings.filterwarnings('ignore')

import gradio as gr
import numpy as np
from pathlib import Path
import tempfile

# SoundLab imports
from soundlab.separation import StemSeparator, SeparationConfig, DemucsModel
from soundlab.transcription import MIDITranscriber, TranscriptionConfig
from soundlab.analysis import analyze_audio
from soundlab.effects import EffectsChain
from soundlab.effects.dynamics import CompressorConfig
from soundlab.effects.eq import HighPassFilterConfig
from soundlab.effects.time_based import ReverbConfig, DelayConfig
from soundlab.utils.gpu import is_cuda_available, get_device

# Check GPU availability
device = get_device()
print(f"üñ•Ô∏è Device: {device}")
print(f"üöÄ CUDA Available: {is_cuda_available()}")

In [None]:
# @title üõ†Ô∏è Step 3: Define Processing Functions { display-mode: "form" }
# @markdown This cell defines all the audio processing functions

def separate_stems(
    audio_file,
    model_name: str = "htdemucs",
    segment_length: float = 7.8,
    overlap: float = 0.25,
    shifts: int = 1,
    progress=gr.Progress()
):
    """Separate audio into stems using Demucs."""
    if audio_file is None:
        return None, None, None, None, "‚ùå Please upload an audio file"
    
    try:
        model_map = {
            "htdemucs": DemucsModel.HTDEMUCS,
            "htdemucs_ft": DemucsModel.HTDEMUCS_FT,
            "htdemucs_6s": DemucsModel.HTDEMUCS_6S,
        }
        model = model_map.get(model_name, DemucsModel.HTDEMUCS)
        
        config = SeparationConfig(
            model=model,
            segment_length=segment_length,
            overlap=overlap,
            shifts=shifts,
        )
        
        progress(0.1, desc="Loading model...")
        separator = StemSeparator(config)
        
        output_dir = Path(tempfile.mkdtemp())
        
        progress(0.2, desc="Separating stems...")
        result = separator.separate(
            audio_path=audio_file,
            output_dir=output_dir,
        )
        
        progress(1.0, desc="Complete!")
        
        vocals = str(result.vocals) if result.vocals else None
        drums = str(result.stems.get("drums")) if result.stems.get("drums") else None
        bass = str(result.stems.get("bass")) if result.stems.get("bass") else None
        other = str(result.stems.get("other")) if result.stems.get("other") else None
        
        status = f"‚úÖ Separation complete in {result.processing_time_seconds:.1f}s"
        return vocals, drums, bass, other, status
        
    except Exception as e:
        return None, None, None, None, f"‚ùå Error: {str(e)}"


def transcribe_to_midi(
    audio_file,
    onset_threshold: float = 0.5,
    frame_threshold: float = 0.3,
    minimum_note_length: float = 0.058,
    minimum_frequency: float = 32.7,
    maximum_frequency: float = 2093.0,
    progress=gr.Progress()
):
    """Transcribe audio to MIDI using Basic Pitch."""
    if audio_file is None:
        return None, None, "‚ùå Please upload an audio file"
    
    try:
        config = TranscriptionConfig(
            onset_threshold=onset_threshold,
            frame_threshold=frame_threshold,
            minimum_note_length=minimum_note_length,
            minimum_frequency=minimum_frequency,
            maximum_frequency=maximum_frequency,
        )
        
        progress(0.2, desc="Initializing transcriber...")
        transcriber = MIDITranscriber(config)
        
        output_dir = Path(tempfile.mkdtemp())
        midi_path = output_dir / "transcription.mid"
        piano_roll_path = output_dir / "piano_roll.png"
        
        progress(0.4, desc="Transcribing audio...")
        result = transcriber.transcribe(
            audio_path=audio_file,
            output_path=midi_path,
        )
        
        progress(0.8, desc="Generating piano roll...")
        if hasattr(result, 'save_piano_roll'):
            result.save_piano_roll(str(piano_roll_path))
        
        progress(1.0, desc="Complete!")
        
        note_count = len(result.notes) if hasattr(result, 'notes') else 0
        status = f"‚úÖ Transcribed {note_count} notes"
        
        piano_roll = str(piano_roll_path) if piano_roll_path.exists() else None
        return str(midi_path), piano_roll, status
        
    except Exception as e:
        return None, None, f"‚ùå Error: {str(e)}"


def analyze_audio_file(audio_file, progress=gr.Progress()):
    """Perform comprehensive audio analysis."""
    if audio_file is None:
        return "", "", "", "", "", "‚ùå Please upload an audio file"
    
    try:
        progress(0.2, desc="Loading audio...")
        result = analyze_audio(audio_file)
        
        progress(0.6, desc="Extracting features...")
        
        tempo_info = f"""**Tempo Analysis**
- BPM: {result.tempo.bpm:.1f}
- Confidence: {result.tempo.confidence:.2%}"""
        
        key_info = f"""**Key Detection**
- Key: {result.key.name}
- Camelot: {result.key.camelot}
- Confidence: {result.key.confidence:.2%}"""
        
        loudness_info = f"""**Loudness Analysis**
- Integrated LUFS: {result.loudness.lufs:.1f}
- True Peak: {result.loudness.true_peak_dbfs:.1f} dBFS
- Dynamic Range: {result.loudness.dynamic_range:.1f} dB"""
        
        spectral_info = f"""**Spectral Features**
- Centroid: {result.spectral.centroid_mean:.1f} Hz
- Brightness: {result.spectral.brightness}"""
        
        onset_info = f"""**Onset Detection**
- Onsets Detected: {len(result.onsets.timestamps)}
- Average Interval: {result.onsets.average_interval:.3f}s""" if result.onsets.timestamps else "No onsets detected"
        
        progress(1.0, desc="Complete!")
        return tempo_info, key_info, loudness_info, spectral_info, onset_info, "‚úÖ Analysis complete!"
        
    except Exception as e:
        return "", "", "", "", "", f"‚ùå Error: {str(e)}"


def apply_effects(
    audio_file,
    hp_enabled: bool = True,
    hp_cutoff: float = 80.0,
    comp_enabled: bool = True,
    comp_threshold: float = -20.0,
    comp_ratio: float = 4.0,
    comp_attack: float = 5.0,
    comp_release: float = 100.0,
    reverb_enabled: bool = False,
    reverb_room_size: float = 0.5,
    reverb_damping: float = 0.5,
    reverb_wet: float = 0.3,
    delay_enabled: bool = False,
    delay_time: float = 0.25,
    delay_feedback: float = 0.3,
    delay_mix: float = 0.3,
    progress=gr.Progress()
):
    """Apply effects chain to audio."""
    if audio_file is None:
        return None, "‚ùå Please upload an audio file"
    
    try:
        progress(0.2, desc="Building effects chain...")
        
        chain = EffectsChain()
        effects_applied = []
        
        if hp_enabled:
            chain.add(HighPassFilterConfig(cutoff_frequency_hz=hp_cutoff))
            effects_applied.append(f"High Pass ({hp_cutoff}Hz)")
        
        if comp_enabled:
            chain.add(CompressorConfig(
                threshold_db=comp_threshold,
                ratio=comp_ratio,
                attack_ms=comp_attack,
                release_ms=comp_release,
            ))
            effects_applied.append(f"Compressor ({comp_ratio}:1)")
        
        if reverb_enabled:
            chain.add(ReverbConfig(
                room_size=reverb_room_size,
                damping=reverb_damping,
                wet_level=reverb_wet,
                dry_level=1.0 - reverb_wet,
            ))
            effects_applied.append(f"Reverb ({reverb_room_size:.0%} room)")
        
        if delay_enabled:
            chain.add(DelayConfig(
                delay_seconds=delay_time,
                feedback=delay_feedback,
                mix=delay_mix,
            ))
            effects_applied.append(f"Delay ({delay_time}s)")
        
        if not effects_applied:
            return audio_file, "‚ö†Ô∏è No effects enabled"
        
        progress(0.5, desc="Processing audio...")
        
        output_dir = Path(tempfile.mkdtemp())
        output_path = output_dir / "processed.wav"
        
        chain.process(
            input_path=audio_file,
            output_path=output_path,
        )
        
        progress(1.0, desc="Complete!")
        status = f"‚úÖ Applied: {', '.join(effects_applied)}"
        
        return str(output_path), status
        
    except Exception as e:
        return None, f"‚ùå Error: {str(e)}"

print("‚úÖ All processing functions defined!")

---
## üéõÔ∏è Step 4: Launch SoundLab Studio

Run the cell below to launch the interactive interface with all audio processing features.

In [None]:
# @title üéµ SoundLab Studio - Interactive Interface { display-mode: "form" }
# @markdown Launch the complete SoundLab Studio interface with all features

with gr.Blocks(title="SoundLab Studio", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # üéµ SoundLab Studio
    
    Production-ready music processing platform. Select a tab below to access different features.
    """)
    
    with gr.Tabs():
        # Stem Separation Tab
        with gr.Tab("üé∏ Stem Separation"):
            gr.Markdown("Separate audio into vocals, drums, bass, and other instruments using Demucs.")
            with gr.Row():
                with gr.Column():
                    sep_audio = gr.Audio(label="Upload Audio", type="filepath")
                    sep_model = gr.Dropdown(["htdemucs", "htdemucs_ft", "htdemucs_6s"], value="htdemucs", label="Model")
                    with gr.Row():
                        sep_segment = gr.Slider(1.0, 15.0, value=7.8, label="Segment Length")
                        sep_overlap = gr.Slider(0.1, 0.5, value=0.25, label="Overlap")
                    sep_shifts = gr.Slider(0, 5, value=1, step=1, label="Random Shifts")
                    sep_btn = gr.Button("üéµ Separate", variant="primary")
                with gr.Column():
                    sep_vocals = gr.Audio(label="üé§ Vocals")
                    sep_drums = gr.Audio(label="ü•Å Drums")
                    sep_bass = gr.Audio(label="üé∏ Bass")
                    sep_other = gr.Audio(label="üéπ Other")
                    sep_status = gr.Textbox(label="Status")
            
            sep_btn.click(
                fn=separate_stems,
                inputs=[sep_audio, sep_model, sep_segment, sep_overlap, sep_shifts],
                outputs=[sep_vocals, sep_drums, sep_bass, sep_other, sep_status]
            )
        
        # Transcription Tab
        with gr.Tab("üéπ MIDI Transcription"):
            gr.Markdown("Convert polyphonic audio to MIDI using Basic Pitch.")
            with gr.Row():
                with gr.Column():
                    trans_audio = gr.Audio(label="Upload Audio", type="filepath")
                    with gr.Row():
                        trans_onset = gr.Slider(0.1, 0.9, value=0.5, label="Onset Threshold")
                        trans_frame = gr.Slider(0.1, 0.9, value=0.3, label="Frame Threshold")
                    trans_min_note = gr.Slider(0.01, 0.2, value=0.058, label="Min Note Length (s)")
                    with gr.Row():
                        trans_min_freq = gr.Slider(20, 500, value=32.7, label="Min Frequency (Hz)")
                        trans_max_freq = gr.Slider(500, 8000, value=2093.0, label="Max Frequency (Hz)")
                    trans_btn = gr.Button("üéµ Transcribe", variant="primary")
                with gr.Column():
                    trans_midi = gr.File(label="üìÑ MIDI File")
                    trans_piano = gr.Image(label="üéπ Piano Roll")
                    trans_status = gr.Textbox(label="Status")
            
            trans_btn.click(
                fn=transcribe_to_midi,
                inputs=[trans_audio, trans_onset, trans_frame, trans_min_note, trans_min_freq, trans_max_freq],
                outputs=[trans_midi, trans_piano, trans_status]
            )
        
        # Analysis Tab
        with gr.Tab("üìä Audio Analysis"):
            gr.Markdown("Extract BPM, key, loudness, and spectral features.")
            with gr.Row():
                with gr.Column(scale=1):
                    ana_audio = gr.Audio(label="Upload Audio", type="filepath")
                    ana_btn = gr.Button("üìä Analyze", variant="primary")
                    ana_status = gr.Textbox(label="Status")
                with gr.Column(scale=2):
                    with gr.Row():
                        ana_tempo = gr.Markdown(label="Tempo")
                        ana_key = gr.Markdown(label="Key")
                    with gr.Row():
                        ana_loud = gr.Markdown(label="Loudness")
                        ana_spectral = gr.Markdown(label="Spectral")
                    ana_onset = gr.Markdown(label="Onsets")
            
            ana_btn.click(
                fn=analyze_audio_file,
                inputs=[ana_audio],
                outputs=[ana_tempo, ana_key, ana_loud, ana_spectral, ana_onset, ana_status]
            )
        
        # Effects Tab
        with gr.Tab("üéöÔ∏è Effects Chain"):
            gr.Markdown("Apply professional audio effects.")
            with gr.Row():
                with gr.Column():
                    fx_audio = gr.Audio(label="Upload Audio", type="filepath")
                    with gr.Accordion("üîä High Pass", open=True):
                        fx_hp_en = gr.Checkbox(label="Enable", value=True)
                        fx_hp_cut = gr.Slider(20, 500, value=80, label="Cutoff (Hz)")
                    with gr.Accordion("üìà Compressor", open=True):
                        fx_comp_en = gr.Checkbox(label="Enable", value=True)
                        fx_comp_th = gr.Slider(-60, 0, value=-20, label="Threshold")
                        fx_comp_ratio = gr.Slider(1, 20, value=4, label="Ratio")
                        with gr.Row():
                            fx_comp_att = gr.Slider(0.1, 100, value=5, label="Attack")
                            fx_comp_rel = gr.Slider(10, 1000, value=100, label="Release")
                    with gr.Accordion("üåä Reverb", open=False):
                        fx_rev_en = gr.Checkbox(label="Enable", value=False)
                        fx_rev_room = gr.Slider(0, 1, value=0.5, label="Room Size")
                        fx_rev_damp = gr.Slider(0, 1, value=0.5, label="Damping")
                        fx_rev_wet = gr.Slider(0, 1, value=0.3, label="Wet")
                    with gr.Accordion("‚è±Ô∏è Delay", open=False):
                        fx_del_en = gr.Checkbox(label="Enable", value=False)
                        fx_del_time = gr.Slider(0.01, 1, value=0.25, label="Time (s)")
                        fx_del_fb = gr.Slider(0, 0.95, value=0.3, label="Feedback")
                        fx_del_mix = gr.Slider(0, 1, value=0.3, label="Mix")
                    fx_btn = gr.Button("üéµ Process", variant="primary")
                with gr.Column():
                    fx_output = gr.Audio(label="Processed Audio")
                    fx_status = gr.Textbox(label="Status")
            
            fx_btn.click(
                fn=apply_effects,
                inputs=[fx_audio, fx_hp_en, fx_hp_cut, fx_comp_en, fx_comp_th, fx_comp_ratio,
                       fx_comp_att, fx_comp_rel, fx_rev_en, fx_rev_room, fx_rev_damp, fx_rev_wet,
                       fx_del_en, fx_del_time, fx_del_fb, fx_del_mix],
                outputs=[fx_output, fx_status]
            )
    
    gr.Markdown("""
    ---
    **SoundLab** | [GitHub](https://github.com/wyattowalsh/soundlab) | [PyPI](https://pypi.org/project/soundlab/) | MIT License
    """)

# Launch the interface
demo.launch(share=True, debug=True)