<a href="https://colab.research.google.com/github/wldbsdl/elecourse-2024-summer-preprocessing/blob/preprocessing/%EC%A0%84%EC%B2%98%EB%A6%AC_4(%EC%A7%84%EC%A7%9C_%EC%B5%9C%EC%A2%85_processed_audio_%EC%9E%90%EB%8F%99_%EC%82%AD%EC%A0%9C).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages
!pip install pydub
!apt-get install ffmpeg
!pip install librosa matplotlib numpy

# Import required libraries
from google.colab import drive
import librosa
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from google.colab import files

# Mapping of emotion identifiers to their corresponding labels
emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

# Function to extract features from audio files and pad/truncate to a fixed length
def extract_features(audio, sr=22050, feature_type='mfcc', num_features=40, delta=False, max_pad_len=431):
    if len(audio) == 0:
        print("Empty audio signal encountered.")
        return None

    try:
        if feature_type == 'mfcc':
            features = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_features)
        elif feature_type == 'mel':
            features = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=num_features)
        elif feature_type == 'stft':
            features = np.abs(librosa.stft(y=audio, n_fft=512))

        if delta:
            delta_feat = librosa.feature.delta(features)
            delta_delta_feat = librosa.feature.delta(features, order=2)
            features = np.concatenate((features, delta_feat, delta_delta_feat), axis=0)

        features = np.clip(features, a_min=-1e10, a_max=1e10)

        if features.shape[1] > max_pad_len:
            features = features[:, :max_pad_len]
        else:
            pad_width = max_pad_len - features.shape[1]
            features = np.pad(features, ((0, 0), (0, pad_width)), mode='constant')

        features = np.nan_to_num(features)

        return features
    except Exception as e:
        print(f"Error extracting features: {e}")
        return None

# Function to load data using a list of audio files
def load_data(audio_files, feature_types=['mfcc', 'mel', 'stft'], num_features=40, delta=False, max_pad_len=431):
    data_dict = {feature_type: [] for feature_type in feature_types}
    labels_list = []
    for file_path in audio_files:
        try:
            if not os.path.exists(file_path):
                print(f'File does not exist: {file_path}')
                continue
            label = file_path.split('/')[-1].split('-')[2]
            audio, sr = librosa.load(file_path, sr=None)
            if len(audio) == 0:
                print(f'Empty audio file: {file_path}')
                continue
            for feature_type in feature_types:
                features = extract_features(audio, sr=sr, feature_type=feature_type, num_features=num_features, delta=delta, max_pad_len=max_pad_len)
                if features is not None and features.shape[1] > 0:
                    data_dict[feature_type].append(features)
            labels_list.append(label)
        except Exception as e:
            print(f'Error processing {file_path}: {e}')
    return {feature_type: np.array(data_dict[feature_type]) for feature_type in feature_types}, np.array(labels_list)

# Function to plot and save a spectrogram
def plot_spectrogram(spectrogram, label, save_path=None):
    plt.figure(figsize=(10, 4))
    if spectrogram.ndim == 2:
        plt.imshow(librosa.power_to_db(spectrogram, ref=np.max), aspect='auto', origin='lower', cmap='viridis')
    elif spectrogram.ndim == 3:
        plt.imshow(librosa.power_to_db(spectrogram[:, :, 0], ref=np.max), aspect='auto', origin='lower', cmap='viridis')
    plt.title(f'Label: {label}')
    plt.ylabel('Frequency bins')
    plt.xlabel('Time frames')
    plt.colorbar(format='%+2.0f dB')
    if save_path:
        plt.savefig(save_path)
        plt.close()
        print(f"Saved spectrogram to {save_path}")

# Function to get all audio files including those in subdirectories
def get_audio_files(root_dir):
    audio_files = []
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.wav'):
                audio_files.append(os.path.join(subdir, file))
    return audio_files

# Function to generate and save spectrogram images for all audio files
def generate_spectrograms_for_all_files(data_dict, output_dir, feature_types):
    for feature_type in feature_types:
        for idx, file_path in enumerate(audio_files):
            try:
                file_name = os.path.basename(file_path).split('.')[0]
                emotion = emotion_map.get(file_path.split('/')[-1].split('-')[2], 'Unknown')
                if idx < len(data_dict[feature_type]):
                    save_path = os.path.join(output_dir, f"{file_name}_{feature_type.upper()}.png")
                    print(f"Generating image for file {file_name} and feature type {feature_type}.")
                    plot_spectrogram(data_dict[feature_type][idx].squeeze(axis=0), emotion, save_path=save_path)
            except Exception as e:
                print(f"Error generating spectrogram for {file_path}: {e}")

# Google Drive mount and directory setup
drive.mount('/content/drive')

# Google Drive path setup
input_dir = '/content/drive/MyDrive/ELECOURSE/audio_files'  # Path where audio files are stored
output_dir = '/content/drive/MyDrive/ELECOURSE/processed_audio'  # Path to save processed files

# Remove existing processed_audio folder if it exists
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

# Create new processed_audio folder
os.makedirs(output_dir)

# Define audio file list
audio_files = get_audio_files(input_dir)

# Check audio file paths
if not audio_files:
    print("No audio files found in the specified directory.")
else:
    print(f"Found {len(audio_files)} audio files.")

# Load and preprocess data
feature_types = ['mfcc', 'mel', 'stft']
data_dict, y = load_data(audio_files, feature_types=feature_types, num_features=40, delta=True)

if all(data.size == 0 for data in data_dict.values()) or y.size == 0:
    print("No data loaded. Please check the feature extraction process.")
else:
    for feature_type, X in data_dict.items():
        print(f"Loaded X shape for {feature_type}: {X.shape}")
    print(f"Loaded y shape: {y.shape}")

    # Label encoding
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Convert to 4D tensor (add channel dimension)
    for feature_type, X in data_dict.items():
        X = X[..., np.newaxis]  # Add channel axis
        X = np.expand_dims(X, axis=1)  # Add batch axis
        data_dict[feature_type] = X

    # Generate spectrogram images for all audio files
    generate_spectrograms_for_all_files(data_dict, output_dir, feature_types)

# User input for zip file name
zip_filename = input("Enter the filename for the zip file (without extension): ")

# Compress and download image files
shutil.make_archive(zip_filename, 'zip', output_dir)
files.download(f"{zip_filename}.zip")


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 192 audio files.
Loaded X shape for mfcc: (192, 120, 431)
Loaded X shape for mel: (192, 120, 431)
Loaded X shape for stft: (192, 771, 431)
Loaded y shape: (192,)
Generating image for file 03-01-05-01-01-01-19 and feature type mfcc.
Saved spectrogram to /content/drive/MyDrive/ELECOURSE/processed_audio/03-01-05-01-01-01-19_MFCC.png
Generating image for file 03-01-05-01-01-01-17 and feature type mfcc.
Saved spectrogram to /content/drive/MyDrive/ELECOURSE/processed_audio/03-01-05-01-01-01-17_MFCC.png
Generating image for file 03-01-05-01-01-01-18 and feature type mfcc.
Saved spectrogram to /content/drive/MyDrive/ELECOURSE/processed_aud

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>