In [None]:
import numpy as np
import librosa
import soundfile as sf
from pydub import AudioSegment
from pydub.effects import strip_silence
import random
import os
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
# HYPERPARAMETERS
duration_seconds = 4
sample_rate = 44100

hyper_params = {
    'duration': duration_seconds*sample_rate,
     'n_mels': 128,
    'hop_length': 512,
    'n_fft': 2048,
    'fmin': 20,
    'fmax': sample_rate//2
}

In [None]:

def audio_preprocess(file_path):
  waveform, sample_rate = librosa.load(file_path, sr=44100)

   #normalising the waveform since each audio file has the amplitude values in different ranges
  waveform = waveform / np.max(np.abs(waveform))

  #keeping values greater than threshold
  waveform, index = librosa.effects.trim(waveform, top_db=60)

  # keeping values greater than threshold = 0.001
  wav = np.abs(waveform)
  mask = wav > 0.001     # 0.001 is equivalent to a 60db threshold
  waveform = waveform[mask]

  # pad to a length of 4s
  if len(waveform) > hyper_params['duration']:
      waveform = waveform[:hyper_params['duration']]
  else:
      padding = hyper_params['duration'] - len(waveform)
      offset = padding // 2
      waveform = np.pad(waveform, (offset, hyper_params['duration'] - len(waveform) - offset), 'constant')

  return waveform, sample_rate


def apply_audio_augmentation(audio, sample_rate, augmentation_type):
    # Load the audio file
    if augmentation_type == "time_shift":
        augmented_audio = np.roll(audio, 3000)
    elif augmentation_type == "speed_change":
        rate = random.uniform(0.7, 1.3)
        augmented_audio = librosa.effects.time_stretch(audio, rate=rate)
    elif augmentation_type == "pitch_shift":
        augmented_audio = librosa.effects.pitch_shift(audio, sr = sample_rate, n_steps=random.uniform(-2, 2))
    elif augmentation_type == "noise_injection":
        # Add white noise
        noise_factor = 0.005
        white_noise = np.random.randn(len(audio)) * noise_factor
        augmented_audio = audio + white_noise

    return augmented_audio

#trial use case, upload whatever audio file, download untitled to see what has been changed
#apply_audio_augmentation('/content/car_horn_5.wav','/content/untitled.wav',"changing_speed")

In [None]:
def augment_audio(input_path, output_path, max_samples=800):
    """
    Augments audio files in the input directory and saves them to the output directory.
    Only augments folders with less than max_samples files.
    """

    # Ensure the output directory exists
    os.makedirs(output_path, exist_ok=True)

    # Loop through each class folder in the input directory
    for class_folder in os.listdir(input_path):
        class_path = os.path.join(input_path, class_folder)
        if os.path.isdir(class_path):
            # Count the number of files in the class folder
            file_paths = [file_name for file_name in os.listdir(class_path)]
            num_files = len(file_paths)

            print(f"Augmenting files in folder: {class_folder}")

            # Calculate the number of augmentations needed
            if num_files < max_samples:
                additional_files_needed = max_samples - num_files
                # Determine augmentation factor per file
                augmentation_factor_per_file = additional_files_needed // num_files
                # Calculate remaining augmentations
                remaining_augmentations = additional_files_needed % num_files
            else:
                augmentation_factor_per_file = 0
                remaining_augmentations = 0

            # Loop through each file in the class folder
            for file_name in file_paths:

                if file_name == "Laughter_284.flac":
                  continue

                # Load the audio file
                audio_path = os.path.join(class_path, file_name)
                waveform, sample_rate = librosa.load(audio_path, sr=44100)

                # Determine the number of augmentations for this file
                augmentations_for_this_file = augmentation_factor_per_file
                if remaining_augmentations > 0:
                    augmentations_for_this_file += 1
                    remaining_augmentations -= 1

                # Apply augmentations
                for i in range(augmentations_for_this_file):
                    # Apply the chosen augmentation
                    augmentation_type = random.choice(["time_shift", "speed_change", "pitch_shift", "noise_injection"])
                    augmented_audio = apply_audio_augmentation(waveform, sample_rate, augmentation_type)

                    # Save the augmented audio to the output directory
                    output_file_name = f"aug_{i}_{os.path.splitext(file_name)[0]}.wav"
                    output_class_folder = os.path.join(output_path, class_folder)
                    os.makedirs(output_class_folder, exist_ok=True)
                    output_audio_path = os.path.join(output_class_folder, output_file_name)

                    sf.write(output_audio_path, augmented_audio, sample_rate)
                    print(f"Augmented file saved: {output_audio_path}")


            # print(class_folder, augmentation_factor_per_file, additional_files_needed, num_files, count)

In [None]:
input_folder = "/content/drive/My Drive/audio_dataset/audio_dataset/train"
output_folder = "/content/drive/My Drive/DLproject-Numpy/augmented_audios_final"

augment_audio(input_folder, output_folder)

Augmenting files in folder: Fart
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_0_Fart_1.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_1_Fart_1.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_0_Fart_10.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_1_Fart_10.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_0_Fart_100.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_1_Fart_100.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_0_Fart_101.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_1_Fart_101.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Fart/aug_0_Fart_

  waveform, sample_rate = librosa.load(audio_path, sr=44100)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Laughter/aug_0_Laughter_99.wav
Augmenting files in folder: Shatter
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_0_Shatter_1.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_1_Shatter_1.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_2_Shatter_1.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_0_Shatter_10.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_1_Shatter_10.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_2_Shatter_10.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Shatter/aug_0_Shatter_100.wav
Augmented file saved: /content/drive/My Drive/DLproj

  waveform, sample_rate = librosa.load(audio_path, sr=44100)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_133.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_134.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_135.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_136.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_137.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_138.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_139.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Snare_drum/aug_0_Snare_drum_14.wav
Augmented file saved: /content/dr

  waveform, sample_rate = librosa.load(audio_path, sr=44100)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_0_Splash_and_splatter_103.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_1_Splash_and_splatter_103.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_2_Splash_and_splatter_103.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_3_Splash_and_splatter_103.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_0_Splash_and_splatter_104.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_1_Splash_and_splatter_104.wav
Augmented file saved: /content/drive/My Drive/DLproject-Numpy/augmented_audios_final/Splash_and_splatter/aug_2_Splash_and_splatter_104.wav
Augmented file saved: /cont