In [2]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [11]:
import numpy as np
import librosa
import soundfile as sf
from pydub import AudioSegment
from pydub.effects import strip_silence

def apply_audio_augmentation(input_path, output_path, augmentation_type):
    # Load the audio file
    audio, sample_rate = librosa.load(input_path, sr=None)

    if augmentation_type == "noise_injection":
        # Add white noise
        noise_factor = 0.005
        white_noise = np.random.randn(len(audio)) * noise_factor
        augmented_audio = audio + white_noise
    elif augmentation_type == "shifting_time":
        # Shift the audio in time
        shift_factor = 0.1  # Fraction of the total length to shift
        shift_samples = int(len(audio) * shift_factor)
        augmented_audio = np.roll(audio, shift_samples)
    elif augmentation_type == "changing_speed":
        # Change the speed of the audio
        speed_factor = 0.9  # Change speed by 10%
        augmented_audio = librosa.effects.time_stretch(audio, rate=speed_factor)
    elif augmentation_type == "changing_pitch":
        # Change the pitch of the audio
        pitch_factor = 1.2  # Increase pitch by 20%
        augmented_audio = librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=pitch_factor)
    else:
        raise ValueError("Invalid augmentation type. Choose from: noise_injection, shifting_time, changing_speed, changing_pitch")

    # Ensure audio is in 16-bit format before saving
    augmented_audio_16bit = (augmented_audio * np.iinfo(np.int16).max).astype(np.int16)

    # Save the augmented audio
    sf.write(output_path, augmented_audio_16bit, sample_rate)

#trial use case, upload whatever audio file, download untitled to see what has been changed
#apply_audio_augmentation('/content/car_horn_5.wav','/content/untitled.wav',"changing_speed")

In [16]:
import os
import random

# Define the function to apply audio augmentation
def apply_audio_augmentation(input_path, output_path, augmentation_type):
    # Load the audio file
    audio, sample_rate = librosa.load(input_path, sr=None)

    if augmentation_type == "noise_injection":
        # Add white noise
        noise_factor = 0.005
        white_noise = np.random.randn(len(audio)) * noise_factor
        augmented_audio = audio + white_noise
    elif augmentation_type == "shifting_time":
        # Shift the audio in time
        shift_factor = 0.1  # Fraction of the total length to shift
        shift_samples = int(len(audio) * shift_factor)
        augmented_audio = np.roll(audio, shift_samples)
    elif augmentation_type == "changing_speed":
        # Change the speed of the audio
        speed_factor = 0.9  # Change speed by 10%
        augmented_audio = librosa.effects.time_stretch(audio, rate=speed_factor)
    elif augmentation_type == "changing_pitch":
        # Change the pitch of the audio
        pitch_factor = 1.2  # Increase pitch by 20%
        augmented_audio = librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=pitch_factor)
    else:
        raise ValueError("Invalid augmentation type. Choose from: noise_injection, shifting_time, changing_speed, changing_pitch")

    # Ensure audio is in 16-bit format before saving
    augmented_audio_16bit = (augmented_audio * np.iinfo(np.int16).max).astype(np.int16)

    # Save the augmented audio
    sf.write(output_path, augmented_audio_16bit, sample_rate)


# Define the function to process the audio dataset
def process_audio_dataset(input_directory, output_directory):
    for root, dirs, files in os.walk(input_directory):
        for dir_name in dirs:
            folder_path = os.path.join(root, dir_name)
            file_count = len([file for file in os.listdir(folder_path) if file.endswith(".wav")])
            if file_count < 640:  # Check if folder contains less than 640 .wav files
                # Create a new folder for augmented data
                augmented_folder_path = os.path.join(root, f"augmented_{dir_name}")
                os.makedirs(augmented_folder_path, exist_ok=True)

                # Augment the audio files
                files_to_augment = random.sample(os.listdir(folder_path), 640 - file_count)
                for file_name in files_to_augment:
                    input_file_path = os.path.join(folder_path, file_name)
                    output_file_name = f"aug_{file_name}"
                    output_file_path = os.path.join(augmented_folder_path, output_file_name)
                    # Randomly choose augmentation type
                    augmentation_type = random.choice(["noise_injection", "shifting_time", "changing_speed", "changing_pitch"])
                    apply_audio_augmentation(input_file_path, output_file_path, augmentation_type)


# Define input and output directories
input_directory_train = '/content/drive/My Drive/DLproject/audio dataset/audio_dataset/train'
input_directory_val = '/content/drive/My Drive/DLproject/audio dataset/audio_dataset/val'

# Process the audio dataset in the train folder
process_audio_dataset(input_directory_train, input_directory_train)

# Process the audio dataset in the val folder
process_audio_dataset(input_directory_val, input_directory_val)


In [28]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [43]:
import os
import random
import librosa
import numpy as np
import soundfile as sf

def augment_audio_folder(input_folder, output_folder):
    """
    Augments audio files in the input directory and saves them to the output directory.
    """
    # Ensure the output directory exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through each folder and subfolder in the input directory
    for root, dirs, files in os.walk(input_folder):
        # Determine the relative path from the input folder to the current folder
        relative_path = os.path.relpath(root, input_folder)
        output_subfolder = os.path.join(output_folder, relative_path)

        # Ensure the corresponding output subfolder exists
        os.makedirs(output_subfolder, exist_ok=True)

        print("Augmenting files in folder:", relative_path)

        # Loop through each file in the current folder
        for file_name in files:
            # Check if the file is a WAV file
            if file_name.endswith(".wav"):
                # Load the audio file
                audio_path = os.path.join(root, file_name)
                y, sr = librosa.load(audio_path, sr=None)

                # Choose a random augmentation type
                augmentation_type = random.choice(["time_shift", "speed_change", "pitch_shift"])

                # Apply the chosen augmentation
                if augmentation_type == "time_shift":
                    augmented_audio = np.roll(y, 3000)
                elif augmentation_type == "speed_change":
                    rate = random.uniform(0.7, 1.3)
                    augmented_audio = librosa.effects.time_stretch(y, rate=rate)
                elif augmentation_type == "pitch_shift":
                    n_steps = random.uniform(-2, 2)  # Number of semitones to shift the pitch
                    augmented_audio = librosa.effects.pitch_shift(y, n_steps=random.uniform(-2,2),sr=sr)

                # Save the augmented audio to the output directory
                output_file_name = "aug_" + file_name
                output_audio_path = os.path.join(output_subfolder, output_file_name)
                sf.write(output_audio_path, augmented_audio, sr)

                print(f"Augmented file saved: {output_audio_path}")

# Example usage:
input_folder = "/content/drive/My Drive/DLproject/audio dataset/audio_dataset/train"
output_folder = "/content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented"

augment_audio_folder(input_folder, output_folder)


Augmenting files in folder: .
Augmenting files in folder: car_horn
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_110.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_122.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_106.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_102.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_115.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_127.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/car_horn/aug_car_horn_1.wav
Augmented file saved: /co

  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Laughter/aug_Laughter_99.wav
Augmenting files in folder: Knock
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Knock/aug_Knock_106.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Knock/aug_Knock_107.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Knock/aug_Knock_103.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Knock/aug_Knock_100.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Knock/aug_Knock_10.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augmented/Knock/aug_Knock_104.wav
Augmented file saved: /content/drive/My Drive/DLproject/audio dataset/audio_dataset/train_augm