In [3]:
import os
import torch
import torchvision.transforms as transforms
import torchaudio.transforms as T
import torchvision.io as io
import torchaudio
import numpy as np
from google.colab import drive
import matplotlib.pyplot as plt
import numpy as np
import librosa
import soundfile as sf
from pydub import AudioSegment
from pydub.effects import strip_silence
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [4]:

# Define augmentation functions
def time_stretch(audio, factor):
    return torchaudio.transforms.TimeStretch(n_stps=factor)(audio)

def pitch_shift(audio, shift):
    return torchaudio.transforms.PitchShift(sample_rate=audio.size(1), n_steps=shift)(audio)

def noise_injection(audio, noise_level):
    noise = torch.randn_like(audio) * noise_level
    return audio + noise

def frequency_masking(spectrogram, num_masks=2, mask_factor=27):
    masked_spectrogram = spectrogram.clone()
    for _ in range(num_masks):
        f = torch.randint(low=0, high=mask_factor, size=(1,))
        f_max = min(f + mask_factor, spectrogram.size(1))
        masked_spectrogram[:, f:f_max] = 0
    return masked_spectrogram

def dynamic_range_compression(audio, factor):
    return torchaudio.transforms.Vol(factor)(audio)

def time_warp(audio, warp_factor):
    return torchaudio.transforms.TimeWarp(sample_rate=audio.size(1), warp_param=warp_factor)(audio)


In [5]:

# Define augmentation parameters
augmentation_params = {
    "time_stretch_factor": 1.1,
    "pitch_shift_amount": 3,
    "noise_level": 0.1,
    "time_warp_factor": 0.2,
    "freq_masking": 2,
    "freq_mask_width": 15
}



# Load the class distribution
class_distribution = {
    "dog_barking": 640,
    "car_horn": 344,
    "Fart": 291,
    "Guitar": 548,
    "drilling": 560,
    "Gunshot_and_gunfire": 448,
    "Hi-hat": 171,
    "Knock": 168,
    "Splash_and_splatter": 174,
    "Snare_drum": 449,
    "Shatter": 212,
    "Laughter": 295,
    "siren": 560
}

# Define threshold for underrepresented classes
threshold = max(class_distribution.values())  # You can adjust this threshold based on your dataset


In [6]:
import random
def augment_spectrograms(class_distribution, data_dir, output_dir, max_samples = 1000, time_mask_param = 80,
                         freq_mask_param = 80):
    # Iterate over the class distribution
    for class_folder in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_folder)
        if os.path.isdir(class_path):
            # Count the number of files in the class folder
            file_names = [file_name for file_name in os.listdir(class_path)]
            num_files = len(file_names)

            print(f"Augmenting files in folder: {class_folder}")

            # Calculate the number of augmentations needed [ALTHOUGH EACH FILE WILL HAVE 800 SAMPLES AT THIS POINT]
            if num_files < max_samples:
                additional_files_needed = max_samples - num_files
                # Determine augmentation factor per file
                files_to_augment = random.sample(file_names, min(additional_files_needed))


            # Loop through each file in the class folder
            for file_name in files_to_augment:
                if file_name == "Laughter_284.flac":
                  continue

                file_path = os.path.join(class_path, file_name)

                mel_spec = torch.from_numpy(np.load(file_path)['mel_spec'])

                augmented_spectogram = mel_spec.clone()

                time_masking = T.TimeMasking(time_mask_param=80)
                freq_masking = T.FrequencyMasking(freq_mask_param=80)

                # APPLY TIME MASKING
                augmented_spectogram = time_masking(augmented_spectogram)

                # APPLY FREQUENCY MASKING
                augmented_spectogram = freq_masking(augmented_spectogram)

                output_class_dir = os.path.join(output_dir, class_folder)
                os.makedirs(output_class_dir, exist_ok=True)

                save_mel_spec = augmented_spectogram.numpy()
                output_npz_path = os.path.join(output_class_dir, f"augmented_{file_name}")
                np.savez(output_npz_path, mel_spec=save_mel_spec)

In [None]:
# Define directory paths
data_dir = '/content/drive/My Drive/DLproject-Numpy'
output_dir = '/content/drive/My Drive/DLproject-Numpy/augmented_spectograms'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

augment_spectrograms(class_distribution, data_dir, output_dir)

In [None]:
# MOVE ALL SPECTOGRAMS CREATED FROM AUGMENETED AUDIOS TO MAIN TRAIN FOLDER
import os
import shutil

def move_npz_files(source_dir, dest_dir):
    # Iterate over each folder in the source directory
    for class_folder in os.listdir(source_dir):
        source_class_path = os.path.join(source_dir, class_folder)
        dest_class_path = os.path.join(dest_dir, class_folder)

        # Ensure the destination class folder exists
        if not os.path.exists(dest_class_path):
            os.makedirs(dest_class_path)

        # Move .npz files from source class folder to destination class folder
        for file_name in os.listdir(source_class_path):
            if file_name.endswith('.npz'):
                source_file_path = os.path.join(source_class_path, file_name)
                dest_file_path = os.path.join(dest_class_path, file_name)
                shutil.move(source_file_path, dest_file_path)
                print(f"Moved '{file_name}' to '{dest_class_path}'")

# Define source and destination directories
source_directory = '/content/drive/My Drive/DLproject-Numpy/spectograms_from_augmented_audios/augmented_audios_final'  # Update with the path to your source directory
destination_directory = '/content/drive/My Drive/DLproject-Numpy/train'  # Update with the path to your destination directory

# Call the function to move .npz files
move_npz_files(source_directory, destination_directory)


In [9]:
# COUNT NUMBER OF FILES IN EACH CLASS FOLDER
import os

def count_files_in_class_folders(dest_dir):
    # Initialize a dictionary to store the count of files in each class folder
    class_file_counts = {}

    # Iterate over each folder in the destination directory
    for class_folder in os.listdir(dest_dir):
        class_folder_path = os.path.join(dest_dir, class_folder)

        # Check if the current item is a directory
        if os.path.isdir(class_folder_path):
            # Count the number of files in the class folder
            num_files = len([file for file in os.listdir(class_folder_path) if os.path.isfile(os.path.join(class_folder_path, file))])
            class_file_counts[class_folder] = num_files

    return class_file_counts

# Define the destination directory
destination_directory = '/content/drive/My Drive/DLproject-Numpy/train'  # Update with the path to your destination directory

# Get the count of files in each class folder
class_file_counts = count_files_in_class_folders(destination_directory)

# Print the counts
for class_name, count in class_file_counts.items():
    print(f"Class '{class_name}': {count} files")

Class 'Fart': 800 files
Class 'Guitar': 800 files
Class 'Gunshot_and_gunfire': 800 files
Class 'Hi-hat': 800 files
Class 'Knock': 800 files
Class 'Laughter': 798 files
Class 'Shatter': 800 files
Class 'Snare_drum': 800 files
Class 'Splash_and_splatter': 800 files
Class 'car_horn': 800 files
Class 'dog_barking': 800 files
Class 'drilling': 800 files
Class 'siren': 800 files
