## Notebook 3: Generate Noisy Dataset (Final, Robust Version)

This notebook downloads the ESC-50 dataset and uses it to create a `noisy_audio` directory by augmenting the files in the `clean_audio` directory.

**Features:**
- Downloads the dataset as a ZIP file to avoid `git` network errors.
- Uses robust paths to correctly locate all necessary folders.
- Displays progress bars for download and augmentation.

In [2]:
import os
import shutil
import requests
import zipfile
import librosa
import numpy as np
import soundfile as sf
import random
from tqdm.notebook import tqdm

# --- 1. Setup & Path Definitions ---
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
zip_url = 'https://github.com/karolpiczak/ESC-50/archive/master.zip'
datasets_dir = os.path.join(project_root, 'datasets')
zip_path = os.path.join(datasets_dir, 'ESC-50.zip')
extract_path = os.path.join(datasets_dir, 'ESC-50-raw')
final_noise_dir = os.path.join(extract_path, 'ESC-50-master', 'audio')

os.makedirs(datasets_dir, exist_ok=True)

print(f"Project Root: {project_root}")
print(f"Dataset will be downloaded to: {zip_path}")

Project Root: /Users/zilongzeng/Research/Drone
Dataset will be downloaded to: /Users/zilongzeng/Research/Drone/datasets/ESC-50.zip


In [3]:
# --- 2. Download and Extract ZIP ---
print('Downloading ESC-50.zip from GitHub...')
if os.path.exists(extract_path):
    shutil.rmtree(extract_path)
if os.path.exists(zip_path):
    os.remove(zip_path)

try:
    response = requests.get(zip_url, stream=True)
    response.raise_for_status() # Raise an exception for bad status codes
    total_size = int(response.headers.get('content-length', 0))

    with open(zip_path, 'wb') as f, tqdm(
        desc='Downloading ESC-50',
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(chunk_size=1024):
            size = f.write(data)
            bar.update(size)

    print(f'\nExtracting {zip_path}...')
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    os.remove(zip_path)
    print(f'\n✅ Noise dataset successfully extracted to: {os.path.abspath(extract_path)}')
except requests.exceptions.RequestException as e:
    print(f"Download failed: {e}")

Downloading ESC-50.zip from GitHub...


Downloading ESC-50: 0.00iB [00:00, ?iB/s]


Extracting /Users/zilongzeng/Research/Drone/datasets/ESC-50.zip...

✅ Noise dataset successfully extracted to: /Users/zilongzeng/Research/Drone/datasets/ESC-50-raw


In [4]:
# --- 3. Data Augmentation Function ---
def augment_with_noise(clean_audio_dir, noise_audio_dir, output_dir, snr_db=15, files_per_category=None):
    if not os.path.isdir(clean_audio_dir):
        print(f"Error: Clean audio directory not found: {clean_audio_dir}")
        return
        
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    noise_files = [os.path.join(noise_audio_dir, f) for f in os.listdir(noise_audio_dir) if f.endswith('.wav')]
    if not noise_files:
        print(f"Error: No noise files found in {noise_audio_dir}")
        return

    print(f'Starting augmentation with SNR = {snr_db} dB...')
    
    command_folders = [d for d in os.listdir(clean_audio_dir) if os.path.isdir(os.path.join(clean_audio_dir, d))]
    for command in tqdm(command_folders, desc='Processing command folders'):
        source_command_path = os.path.join(clean_audio_dir, command)
        output_command_path = os.path.join(output_dir, command)
        os.makedirs(output_command_path, exist_ok=True)
        
        files_to_process = [f for f in os.listdir(source_command_path) if f.endswith('.wav')]
        if files_per_category and len(files_to_process) > files_per_category:
            files_to_process = random.sample(files_to_process, files_per_category)
            
        for clean_file in files_to_process:
            clean_path = os.path.join(source_command_path, clean_file)
            speech, sr = librosa.load(clean_path, sr=16000)
            
            noise_path = random.choice(noise_files)
            noise, _ = librosa.load(noise_path, sr=sr)
            
            if len(noise) < len(speech):
                noise = np.pad(noise, (0, len(speech) - len(noise)), 'wrap')
            
            start = random.randint(0, len(noise) - len(speech))
            noise_segment = noise[start:start + len(speech)]
            
            speech_power = np.mean(speech**2)
            noise_power = np.mean(noise_segment**2)
            snr = 10**(snr_db / 10)
            scale = np.sqrt(speech_power / (snr * noise_power + 1e-10))
            noisy_speech = speech + noise_segment * scale
            
            output_file_path = os.path.join(output_command_path, clean_file)
            sf.write(output_file_path, noisy_speech, sr)
    
    print(f"\n✅ Augmentation complete. Noisy files saved in: {os.path.abspath(output_dir)}")

# --- Run Data Augmentation ---
clean_data_dir = os.path.join(project_root, 'datasets', 'drone_data_for_training', 'clean_audio')
noisy_output_dir = os.path.join(project_root, 'datasets', 'drone_data_for_training', 'noisy_audio')

if os.path.exists(final_noise_dir):
    augment_with_noise(clean_data_dir, final_noise_dir, noisy_output_dir)
else:
    print(f"Error: Could not find the final noise directory at {final_noise_dir}")

Error: Clean audio directory not found: /Users/zilongzeng/Research/Drone/datasets/drone_data_for_training/clean_audio
