In [None]:
import os
import numpy as np
import librosa
from google.colab import drive
import matplotlib.pyplot as plt
from PIL import Image
import cv2
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def compute_mel_spectrogram(audio_file_path, n_fft=2048, hop_length=512, n_mels=128):
    # Load the audio file
    y, sr = librosa.load(audio_file_path, sr=None)

    # Compute the mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    return mel_spec_db


def mono_to_color2(X):
    # Convert single-channel image to three channels
    color_img = cv2.cvtColor(X, cv2.COLOR_GRAY2BGR)

    # Normalize pixel values to the range [0, 255]
    normalized_img = cv2.normalize(color_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    return normalized_img

def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    X = np.stack([X, X, X], axis=-1)
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min


    if (_max - _min) > eps:
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

In [None]:
def save_mel_spectrogram_as_image(mel_spec, output_image_path):
    # Plot the mel spectrogram without axes and color bar
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spec, hop_length=512, x_axis=None, y_axis=None)
    plt.axis('off')  # Turn off axes
    plt.colorbar(format=None)  # Turn off color bar

    # Save the mel spectrogram as an image without padding
    plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()

In [None]:
def process_label_folder(label_directory, output_label_directory):
    # Loop through each audio file in the label directory
    for audio_file in os.listdir(label_directory):
        audio_file_path = os.path.join(label_directory, audio_file)

        try:
            # Compute mel spectrogram
            mel_spec = compute_mel_spectrogram(audio_file_path)

            # Convert to 3 channels [Better for inputting to CNN model]
            mel_spec = mono_to_color2(mel_spec)

            # Save mel spectrogram as numpy array
            output_npz_path = os.path.join(output_label_directory, os.path.splitext(audio_file)[0] + '.npz')
            np.savez(output_npz_path, mel_spec=mel_spec)

            print(f'Saved mel spectrogram array: {output_npz_path}')
        except Exception as e:
            print(f'Error processing {audio_file}: {str(e)}')
            continue



In [None]:
def process_audio_dataset(root_directory, output_directory):
    # Loop through 'train' and 'val' subfolders
    for subfolder in ['train', 'val']:
        subfolder_directory = os.path.join(root_directory, subfolder)

        # Loop through each label folder in the 'train' or 'val' subfolder
        for label in os.listdir(subfolder_directory):
        # for label in ['Laughter', 'Shatter', 'Snare_drum', 'Splash_and_splatter', 'siren']:
            label_directory = os.path.join(subfolder_directory, label)

            # Create a corresponding output subfolder for the label
            output_label_directory = os.path.join(output_directory, subfolder, label)
            os.makedirs(output_label_directory, exist_ok=True)

            # Process audio files in label folder
            process_label_folder(label_directory, output_label_directory)


In [None]:
def save_spectograms_as_arrays():
    root_directory = '/content/drive/My Drive/audio_dataset/audio_dataset'

    # Define the directory where mel spectrogram images will be saved
    output_directory = '/content/drive/My Drive/DLproject-Numpy/'

    # Process the audio dataset
    process_audio_dataset(root_directory, output_directory)


In [None]:
def load_and_display_npz(npz_file_path):
    # Load the npz file
    data = np.load(npz_file_path)

    # Extract the mel spectrogram array
    mel_spec = data['mel_spec']
    # mel_spec = Image.fromarray(data['mel_spec'], mode='RGB')

    # Display the mel spectrogram
    plt.figure(figsize=(10, 4))
    plt.imshow(mel_spec)
    plt.title('Mel Spectrogram')
    plt.xlabel('Time')
    plt.ylabel('Frequency')
    plt.colorbar(label='Amplitude (dB)')
    plt.show()

    return mel_spec



In [None]:
# Example usage
npz_file_path = '/content/drive/My Drive/DLproject-Numpy/train/Laughter/Laughter_120.npz'
mel_spec = load_and_display_npz(npz_file_path)