In [1]:
# Notebook 3.5 - Extract Mel Spectrograms for Unknown Class

# Cell 1: Import packages
import os
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
from tqdm import tqdm

# Cell 2: Set path variables
processed_dir = '../data/processed'
feature_output_dir = '../data/features/mel'
unknown_dir = os.path.join(processed_dir, 'unknown')

# Cell 3: Define Mel spectrogram extraction function
def extract_mel_spectrogram(audio_path, n_mels=128, n_fft=2048, hop_length=128):
    # Load audio file
    y, sr = librosa.load(audio_path, sr=None)
    
    # Extract mel spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_mels=n_mels,
        n_fft=n_fft,
        hop_length=hop_length,
        fmin=20,
        fmax=sr/2,
        power=2.0
    )
    
    # Convert to log scale (dB)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Normalize to [0, 1]
    mel_spec_norm = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())
    
    # Resize to (128, 32)
    mel_spec_norm = tf.image.resize(mel_spec_norm[..., np.newaxis], (128, 32))
    mel_spec_norm = mel_spec_norm.numpy()
    mel_spec_norm = mel_spec_norm[..., 0]
    
    return mel_spec_norm

# Cell 4: Get list of unknown audio files
audio_files = []
for fname in os.listdir(unknown_dir):
    if fname.endswith('.wav'):
        audio_files.append(os.path.join(unknown_dir, fname))

# Cell 5: Process and extract features
metadata = []

for audio_path in tqdm(audio_files, desc='Extracting Mel spectrograms for unknown'):
    rel_path = os.path.relpath(audio_path, processed_dir)
    feature_dir = os.path.join(feature_output_dir, os.path.dirname(rel_path))
    os.makedirs(feature_dir, exist_ok=True)

    # Extract mel spectrogram
    mel_features = extract_mel_spectrogram(audio_path)

    # Save .npy file
    feature_path = os.path.join(feature_dir, os.path.splitext(os.path.basename(audio_path))[0] + '.npy')
    np.save(feature_path, mel_features)

    # Add to metadata
    metadata.append({
        'audio_path': rel_path,
        'feature_path': os.path.relpath(feature_path, feature_output_dir),
        'mel_shape': mel_features.shape,
        'duration': librosa.get_duration(path=audio_path),
        'sr': librosa.get_samplerate(audio_path)
    })

# Cell 6: Append to existing metadata.csv
metadata_df = pd.DataFrame(metadata)
existing_metadata = pd.read_csv(os.path.join(feature_output_dir, 'metadata.csv'))
combined_metadata = pd.concat([existing_metadata, metadata_df], ignore_index=True)
combined_metadata.to_csv(os.path.join(feature_output_dir, 'metadata.csv'), index=False)


2025-06-03 20:41:57.313228: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-03 20:41:57.322980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748958117.334364   26107 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748958117.337574   26107 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748958117.346325   26107 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 