In [14]:
from __future__ import annotations

import sys
from pathlib import Path
import numpy as np
import soundfile as sf
from tqdm import tqdm

from audiomentations import Compose, AddGaussianSNR, PitchShift

PROJECT_ROOT = Path.cwd().parents[1]
sys.path.append(str(PROJECT_ROOT))

IN_DIR = PROJECT_ROOT / "data/wavs"
OUT_DIR = PROJECT_ROOT / "data/augmented_wavs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("PROJECT_ROOT:", PROJECT_ROOT)
print("IN_DIR:", IN_DIR)
print("OUT_DIR:", OUT_DIR)


PROJECT_ROOT: /home/SpeakerRec/BioVoice
IN_DIR: /home/SpeakerRec/BioVoice/data/wavs
OUT_DIR: /home/SpeakerRec/BioVoice/data/augmented_wavs


In [15]:
# Config ranges
SNR_MIN_DB, SNR_MAX_DB = 5.0, 25.0
PITCH_MIN_ST, PITCH_MAX_ST = -2.0, 2.0

# Three “profiles”
aug_snr = Compose([
    AddGaussianSNR(min_snr_db=SNR_MIN_DB, max_snr_db=SNR_MAX_DB, p=1.0),
])

aug_pitch = Compose([
    PitchShift(min_semitones=PITCH_MIN_ST, max_semitones=PITCH_MAX_ST, p=1.0),
])

aug_snr_pitch = Compose([
    AddGaussianSNR(min_snr_db=SNR_MIN_DB, max_snr_db=SNR_MAX_DB, p=1.0),
    PitchShift(min_semitones=PITCH_MIN_ST, max_semitones=PITCH_MAX_ST, p=1.0),
])


In [16]:
# %%
def apply_aug(audio: np.ndarray, sr: int, aug: Compose) -> np.ndarray:
    """
    soundfile returns:
      mono: (N,)
      multi: (N, C)
    Apply augmentation per-channel for multi-channel safely.
    """
    audio = audio.astype(np.float32, copy=False)

    if audio.ndim == 1:
        out = aug(samples=audio, sample_rate=sr).astype(np.float32)
        return out

    out = np.empty_like(audio, dtype=np.float32)
    for ch in range(audio.shape[1]):
        out[:, ch] = aug(samples=audio[:, ch], sample_rate=sr).astype(np.float32)
    return out


def safe_peak_normalize(audio: np.ndarray, peak: float = 0.99) -> np.ndarray:
    max_abs = float(np.max(np.abs(audio))) if audio.size else 0.0
    if max_abs > peak and max_abs > 0:
        audio = (audio / max_abs) * peak
    return audio


In [17]:
# %%
wav_paths = sorted(IN_DIR.rglob("*.wav"))
assert IN_DIR.exists(), f"IN_DIR does not exist: {IN_DIR}"
print(f"Found {len(wav_paths)} wav files")

profiles = [
    ("aug-SNR", aug_snr),
    ("aug-pitch", aug_pitch),
    ("aug-SNR-pitch", aug_snr_pitch),
]

for wav_path in tqdm(wav_paths, desc="Augmenting"):
    audio, sr = sf.read(wav_path, always_2d=False)

    # Keep folder structure
    rel = wav_path.relative_to(IN_DIR)
    out_subdir = OUT_DIR / rel.parent
    out_subdir.mkdir(parents=True, exist_ok=True)

    for tag, aug in profiles:
        aug_audio = apply_aug(audio, sr, aug)
        aug_audio = safe_peak_normalize(aug_audio, peak=0.99)

        out_name = f"{wav_path.stem}_{tag}{wav_path.suffix}"
        out_path = out_subdir / out_name
        sf.write(out_path, aug_audio, sr)

print("Done.")


Found 90 wav files


Augmenting: 100%|██████████| 90/90 [00:04<00:00, 21.01it/s]

Done.





In [18]:
# %%
outs = sorted(OUT_DIR.rglob("*.wav"))
print(f"Saved {len(outs)} augmented wavs into: {OUT_DIR}")
for p in outs[:20]:
    print(p.relative_to(OUT_DIR))


Saved 270 augmented wavs into: /home/SpeakerRec/BioVoice/data/augmented_wavs
eden_001_aug-SNR-pitch.wav
eden_001_aug-SNR.wav
eden_001_aug-pitch.wav
eden_002_aug-SNR-pitch.wav
eden_002_aug-SNR.wav
eden_002_aug-pitch.wav
eden_003_aug-SNR-pitch.wav
eden_003_aug-SNR.wav
eden_003_aug-pitch.wav
eden_004_aug-SNR-pitch.wav
eden_004_aug-SNR.wav
eden_004_aug-pitch.wav
eden_005_aug-SNR-pitch.wav
eden_005_aug-SNR.wav
eden_005_aug-pitch.wav
eden_006_aug-SNR-pitch.wav
eden_006_aug-SNR.wav
eden_006_aug-pitch.wav
eden_007_aug-SNR-pitch.wav
eden_007_aug-SNR.wav
