In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import torch

# Add parent directory to path to import src modules
sys.path.append(os.path.abspath('../src'))

from data.audio_loader import load_audio

# Set seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Exploratory Data Analysis (EDA)

This notebook explores the LibriSpeech dataset to understand:
1.  Audio waveforms and Mel-spectrograms.
2.  Duration distribution (to determine UAP length).
3.  Amplitude statistics (normalization verification).

In [None]:
def load_audio_sample(filepath):
    """Load audio using the custom audio loader."""
    try:
        # Load audio with librosa to get original SR for display, 
        # but rely on custom loader for processing if needed.
        y, sr = librosa.load(filepath, sr=None) # sr=None preserves original rate
        return y, sr
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None, None

### Visualize Waveforms & Mel-Spectrograms

We will plot 5 random samples to see the raw audio and its frequency representation.

In [None]:
def plot_audio_analysis(filepath, title=""):
    y, sr = load_audio_sample(filepath)
    
    if y is None:
        return
    
    plt.figure(figsize=(12, 8))
    
    # Plot Waveform
    plt.subplot(2, 1, 1)
    librosa.display.waveshow(y, sr=sr)
    plt.title(f'Waveform: {title}')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    
    # Plot Mel-spectrogram
    plt.subplot(2, 1, 2)
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    librosa.display.specshow(mel_spec_db, sr=sr, x_axis='time', y_axis='mel', fmax=8000)
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Mel-Spectrogram: {title}')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Duration: {librosa.get_duration(y=y, sr=sr):.2f}s | SR: {sr}Hz | Min Amp: {y.min():.3f} | Max Amp: {y.max():.3f}")

### Load Sample Audio

*(Note: You must update `sample_path` with a valid LibriSpeech or CommonVoice file path)*

In [None]:
# CONFIGURATION
# Update this path to a real audio file for testing
# Example: '/path/to/LibriSpeech/test-clean/116/121/116-121-0045.flac'
sample_path = 'sample_audio.flac' 

if os.path.exists(sample_path):
    plot_audio_analysis(sample_path, "Sample Audio")
else:
    print("[INFO] Sample audio file not found. Skipping visualization.")
    print("[INFO] Ensure you have downloaded data using src/data/download_data.py")

### Audio Duration Distribution Analysis

We need to determine the maximum duration in our dataset to decide the UAP vector length (e.g., 30s vs max duration).

In [None]:
def analyze_duration_distribution(audio_dir, max_samples=50):
    durations = []
    file_count = 0
    
    # Simple recursive search for .flac or .wav files
    for root, dirs, files in os.walk(audio_dir):
        for file in files:
            if file.endswith(('.flac', '.wav')):
                if file_count >= max_samples:
                    break
                path = os.path.join(root, file)
                y, sr = load_audio_sample(path)
                if y is not None:
                    durations.append(librosa.get_duration(y=y, sr=sr))
                    file_count += 1
    
    if not durations:
        print("No audio files found.")
        return
    
    plt.figure(figsize=(10, 5))
    plt.hist(durations, bins=50, alpha=0.7, color='blue', edgecolor='black')
    plt.xlabel('Duration (seconds)')
    plt.ylabel('Frequency')
    plt.title(f'Duration Distribution of {len(durations)} Samples')
    plt.axvline(np.mean(durations), color='r', linestyle='dashed', linewidth=1, label=f'Mean: {np.mean(durations):.2f}s')
    plt.axvline(np.max(durations), color='g', linestyle='dashed', linewidth=1, label=f'Max: {np.max(durations):.2f}s')
    plt.legend()
    plt.show()
    
    print(f"Total samples analyzed: {len(durations)}")
    print(f"Min Duration: {min(durations):.2f}s")
    print(f"Max Duration: {max(durations):.2f}s")
    print(f"Mean Duration: {np.mean(durations):.2f}s")

In [None]:
# CONFIGURATION: Update this path to your LibriSpeech directory
audio_directory = 'data/LibriSpeech/test-clean'

if os.path.exists(audio_directory):
    analyze_duration_distribution(audio_directory)
else:
    print("[INFO] Directory not found. Cannot analyze duration distribution.")

### Amplitude Statistics

Verify that the audio is normalized to [-1, 1] and check for clipping.

In [None]:
def check_amplitude_stats(filepath):
    y, sr = load_audio_sample(filepath)
    if y is None:
        return
    
    stats = {
        'min': y.min(),
        'max': y.max(),
        'mean': y.mean(),
        'std': y.std(),
        'clipped_count': np.sum(np.abs(y) > 1.0),
        'total_samples': len(y)
    }
    
    print("--- Amplitude Statistics ---")
    for k, v in stats.items():
        print(f"{k.replace('_', ' ').title()}: {v}")
    
    if stats['clipped_count'] > 0:
        print(f"[WARNING] {stats['clipped_count']} samples out of {stats['total_samples']} exceed the [-1, 1] range.")

In [None]:
if os.path.exists(sample_path):
    check_amplitude_stats(sample_path)
else:
    print("[INFO] Sample path not found.")