## Notebook 3: Download and Prepare ESC-50 Dataset (Background Noise)

This notebook downloads the [ESC-50 dataset](https://github.com/karolpiczak/ESC-50) from GitHub to be used for background noise in data augmentation.

**Steps:**
1.  **Clone Repository**: Use `git` to clone the repository containing the audio files.
2.  **Prepare Augmentation Function**: A function is provided to mix these noise files with a clean speech dataset to create a more robust training set.

In [None]:
import os
import shutil

# 1. Clone GitHub Repository
print('Cloning ESC-50 dataset from GitHub...')
repo_url = 'https://github.com/karolpiczak/ESC-50.git'
clone_path = './datasets/ESC-50'

if os.path.exists(clone_path):
    print('Repository already exists, skipping clone.')

print(f'Dataset successfully downloaded to: {clone_path}')

### 2. Data Augmentation Function

This function takes a directory of clean speech files and the directory of noise files, mixing them at a specified Signal-to-Noise Ratio (SNR) to generate augmented training data.

In [None]:
import librosa
import numpy as np
import soundfile as sf
import random

def augment_with_noise(clean_audio_dir, noise_audio_dir, output_dir, snr_db=10):
    """
    Augments a clean audio dataset by mixing it with random noise files.

    Args:
        clean_audio_dir (str): Path to the directory with clean audio folders.
        noise_audio_dir (str): Path to the directory with noise .wav files.
        output_dir (str): Path to save the augmented audio.
        snr_db (int): Desired Signal-to-Noise Ratio in dB.
    """
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    noise_files = [f for f in os.listdir(noise_audio_dir) if f.endswith('.wav')]
    if not noise_files:
        print(f"Error: No noise files found in {noise_audio_dir}")
        return

    print(f'Starting augmentation with SNR = {snr_db} dB...')
    for keyword_folder in os.listdir(clean_audio_dir):
        keyword_path = os.path.join(clean_audio_dir, keyword_folder)
        if not os.path.isdir(keyword_path):
            continue
        
        output_keyword_path = os.path.join(output_dir, keyword_folder)
        os.makedirs(output_keyword_path, exist_ok=True)

        for clean_file in os.listdir(keyword_path):
            if clean_file.endswith('.wav'):
                # Load clean audio
                clean_path = os.path.join(keyword_path, clean_file)
                speech, sr = librosa.load(clean_path, sr=16000)
                
                # Load random noise
                noise_file = random.choice(noise_files)
                noise_path = os.path.join(noise_audio_dir, noise_file)
                noise, _ = librosa.load(noise_path, sr=sr)
                
                # Ensure noise is long enough
                while len(noise) < len(speech):
                    noise = np.concatenate([noise, noise])
                
                # Trim noise to match speech length
                start = random.randint(0, len(noise) - len(speech))
                noise = noise[start:start + len(speech)]
                
                # Calculate powers and mix
                speech_power = np.mean(speech**2)
                noise_power = np.mean(noise**2)
                snr = 10**(snr_db / 10)
                scale = np.sqrt(speech_power / (snr * noise_power))
                noisy_speech = speech + noise * scale
                
                # Save augmented file
                output_file_path = os.path.join(output_keyword_path, f"noisy_{clean_file}")
                sf.write(output_file_path, noisy_speech, sr)
    
    print(f"✅ Augmentation complete. Noisy files saved in: {output_dir}")

# --- Example Usage ---
# This will create a new directory with noisy versions of the clean speech commands.
# You can then run the preprocessing steps from Notebook 1 on this new directory.
clean_data_dir = './datasets/speech-commands/speech_commands' 
noise_data_dir = './datasets/ESC-50/audio'
augmented_output_dir = './datasets/speech-commands-augmented'

augment_with_noise(clean_data_dir, noise_data_dir, augmented_output_dir, snr_db=15)