In [1]:
import os
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Gain
from pydub import AudioSegment
import numpy as np
import re
import array
import string
import random

In [2]:
audio_folder = 'Dataset/Valid/Yes'
output_folder = 'Dataset/Valid/Yes/augmented'

In [3]:
augmentations = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Gain(min_gain_in_db=10, max_gain_in_db=10, p=0.5),
])

In [4]:
# Function to generate a random string of characters
def generate_random_string(length):
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for _ in range(length))

for filename in os.listdir(audio_folder):
    if filename.endswith('.mp3'):
        audio_path = os.path.join(audio_folder, filename)
        audio = AudioSegment.from_mp3(audio_path)

        # Convert MP3 to WAV format
        audio.export("temp.wav", format="wav")

        # Load the WAV file as floating-point data
        wav_data = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32767.0

        # Apply audio augmentation
        augmented_audio = augmentations(samples=wav_data, sample_rate=audio.frame_rate)

        # Convert back to 16-bit integer
        augmented_audio = (augmented_audio * 32767.0).astype(np.int16)

        # Create the output audio segment
        augmented_segment = AudioSegment(
            augmented_audio.tobytes(),
            frame_rate=audio.frame_rate,
            sample_width=audio.sample_width,
            channels=audio.channels
        )

        # Generate a unique filename
        output_base_filename = os.path.splitext(filename)[0]
        random_string = generate_random_string(5)
        output_filename = f"augmented_{output_base_filename}_{random_string}.mp3"
        output_path = os.path.join(output_folder, output_filename)

        # Create the output folder if it doesn't exist
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # Save the augmented audio as MP3
        augmented_segment.export(output_path, format="mp3")