<a href="https://colab.research.google.com/github/wasef-c/emotion_rec/blob/main/DATA_PRE_PROCESSING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**DATASET COLLECTION**

The code below was used to extract data from the following datasets:

Toronto Emotional Speech Set Data [(TESS)](https://www.kaggle.com/datasets/ejlok1/toronto-emotional-speech-set-tess)

Ryerson Audio-Visual Database of Emotional Speech and Song[(RAVDESS)](https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio)

Crowd Sourced Emotional Multimodal Actors Dataset [(CREMA-D)](https://www.kaggle.com/datasets/ejlok1/cremad)

Interactive Emotional Dyadic Motion Capture [(IEMOCAP)](https://www.kaggle.com/datasets/columbine/iemocap)



In [None]:
# Dependencies
import numpy as np
import pandas as pd
import os
import librosa
import matplotlib.pyplot as plt
import gc
import time
from tqdm import tqdm, tqdm_notebook; tqdm.pandas() # Progress bar
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.model_selection import train_test_split

# Machine Learning
import tensorflow as tf
from keras import backend as K
from tensorflow.keras.layers import Layer, InputSpec

# from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints, optimizers, layers

from tensorflow.keras.layers import (Dense, Bidirectional, ELU,
                          Dropout, LeakyReLU, Conv1D, BatchNormalization)
from keras.models import Sequential
# from keras.optimizers import Adam
from keras.callbacks import EarlyStopping


# Set seed for reproducability
seed = 1234
np.random.seed(seed)
tf.random.set_seed(seed)

t_start = time.time()
import matplotlib.pyplot as plt


 **TESS DATA COLLECTION**


In [None]:
#TESS

import os

# Path to your directory containing audio files
#Change to your respective directory after downloading TESS or using kaggle to import

directory_path = 'D:\Documents\MASC\Emo_rec_001\TESS Toronto emotional speech set data'

# Filter filenames that start with '03-01' and end with language code '01'
T_filtered_filenames = []
T_emotion_labels = []
T_labels = []
T_fname = []

# Mapping of emotion codes to corresponding emotions
EMOTIONS  = {
    0: 'neutral',
    1: 'happy',
    2: 'sad',
    3: 'angry',
    4: 'fearful',
    5: 'disgust',
}

Map2Num = {
    'neutral': 0,
    'happy': 1,
    'sad': 2,
    'angry': 3,
    'fear': 4,
    'disgust': 5
}


def traverse_directories(directory):
    for root, dirs, files in os.walk(directory):
        for filename in files:
            full_file_path = os.path.join(root, filename)
            T_emotion = str(filename.split('_')[2])
            T_emotion = str(T_emotion.split('.')[0])
            if T_emotion != 'ps':
                T_filtered_filenames.append(full_file_path)
                T_fname.append(filename)
                T_label = Map2Num.get(T_emotion, 'Unknown')
                T_labels.append(T_label)
                T_emotion_labels.append(T_emotion)

# Traverse through all subdirectories and extract file paths of English files
traverse_directories(directory_path)

**CREMA-D**

In [None]:
## CREMA
# Path to your directory containing audio files
directory_path = 'D:\Documents\MASC\Emo_rec_001\CREMA-D\AudioWAV'


C_filenames = []
C_emotion_labels = []
C_labels = []
C_fname = []
C_filtered_filenames = []

# Mapping of emotion codes to corresponding emotions
EMOTIONS  = {
    0: 'neutral',
    1: 'happy',
    2: 'sad',
    3: 'angry',
    4: 'fear',
    5: 'disgust',
}


num_mapping = {
    'NEU': 0,
    'HAP': 1,
    'SAD': 2,
    'ANG': 3,
    'FEA': 4,
    'DIS': 5
}


# Function to filter English files and traverse directories
def traverse_directories2(directory):
    for root, dirs, files in os.walk(directory):
        for filename in files:
            full_file_path = os.path.join(root, filename)
            C_emotion = str(filename.split('_')[2])
            C_filtered_filenames.append(full_file_path)
            C_fname.append(filename)
            C_label = num_mapping.get(C_emotion, 'Unknown')
            C_labels.append(C_label)
            C_Aemotion = EMOTIONS.get(C_label, 'Unknown')
            C_emotion_labels.append(C_Aemotion)
            # print(f"File Path: {full_file_path}, Emotion: {emotion}")

# Traverse through all subdirectories and extract file paths of English files
traverse_directories2(directory_path)

**RAVDESS**

In [None]:
## RAVDESS
# Path to your directory containing audio files
directory_path = 'D:\Documents\MASC\Emo_rec_001\RAVDESS'

# Filter filenames that start with '03-01' and end with language code '01'
R_filenames = []
R_emotion_labels = []
R_labels = []
R_fname = []
R_filtered_filenames = []

# Mapping of emotion codes to corresponding emotions
EMOTIONS  = {
    0: 'neutral',
    1: 'happy',
    2: 'sad',
    3: 'angry',
    4: 'fear',
    5: 'disgust',
}


# Function to filter English files and traverse directories
def traverse_directories3(directory):
    cv = 0
    for root, dirs, files in os.walk(directory):
        for filename in files:
            full_file_path = os.path.join(root, filename)
            R_val = int(filename.split('-')[2])-1
            if R_val != 7:
                if R_val != 0:
                    R_val = R_val-1

                R_filtered_filenames.append(full_file_path)
                R_fname.append(filename)
                R_labels.append(R_val)
                R_emotion = EMOTIONS.get(R_val, 'Unknown')
                R_emotion_labels.append(R_emotion)


# Traverse through all subdirectories and extract file paths of English files
traverse_directories3(directory_path)



**IEMOCAP**

In [None]:
import os
import glob
import numpy as np

''' angry, happy, sad, neutral, frustrated, excited, fearful, disgusted, excited, other	'''

EMOTIONS = {
    0: 'neutral',
    1: 'happy',
    2: 'sad',
    3: 'angry',
    4: 'fear',
    5: 'disgust',
    # 7: 'surprised',
    # 8: 'excited',
    # 9: 'pleasure',
    # 10: 'pain',
    # 11: 'disappointment'
}
class CaseInsensitiveDict(dict):
    def __getitem__(self, key):
        if isinstance(key, str):
            key = key.lower()
        return super().__getitem__(key)

IEM = CaseInsensitiveDict({
    'neu': 0,
    'hap': 1,
    'sad': 2,
    'ang': 3,
    'fea': 4,
    'dis': 5,
    # 'exc': 1,
    # anxious, apologetic, assertive, concerned, encouraging, excited
})

# Path to the main directory containing .wav files for five sessions
sessions = [
    r'D:\Documents\MASC\Emo_rec_001\IEMOCAP_DATA\IEMOCAP_full_release_withoutVideos.tar\IEMOCAP_full_release\Session1\sentences\wav',
    r'D:\Documents\MASC\Emo_rec_001\IEMOCAP_DATA\IEMOCAP_full_release_withoutVideos.tar\IEMOCAP_full_release\Session2\sentences\wav',
    r'D:\Documents\MASC\Emo_rec_001\IEMOCAP_DATA\IEMOCAP_full_release_withoutVideos.tar\IEMOCAP_full_release\Session3\sentences\wav',
    r'D:\Documents\MASC\Emo_rec_001\IEMOCAP_DATA\IEMOCAP_full_release_withoutVideos.tar\IEMOCAP_full_release\Session4\sentences\wav',
    r'D:\Documents\MASC\Emo_rec_001\IEMOCAP_DATA\IEMOCAP_full_release_withoutVideos.tar\IEMOCAP_full_release\Session5\sentences\wav',
]

# Function to extract emotion labels, file directories, and file names for multiple sessions
def extract_emotion_labels_and_files_for_sessions(sessions):
    all_labels = []
    all_directories = []
    all_file_names = []
    all_emos  = []

    for session_dir in sessions:
        wav_files = glob.glob(os.path.join(session_dir, '**', '*.wav'), recursive=True)
        labels = []
        file_directories = []
        file_names = []
        emos = []
        count = 0

        for wav_file in wav_files:
            session_id = os.path.basename(os.path.dirname(wav_file))
            file_directory = os.path.dirname(wav_file)
            emo_eval_file = os.path.join(session_dir.replace('sentences\\wav', 'dialog\\EmoEvaluation'), f'{session_id}.txt')

            if os.path.exists(emo_eval_file):
                with open(emo_eval_file, 'r') as emo_file:
                    lines = emo_file.readlines()
                    file_name_with_extension = os.path.basename(wav_file)
                    file_name = os.path.splitext(file_name_with_extension)[0]  # Extracting file name without extension

                    for line_index, line in enumerate(lines):
                        if file_name in line:
                            emotion = line.split('\t')[2].strip()
                            if emotion == 'xxx':
                                line_index+=1
                                emotion = lines[line_index + 1].split('\t')[1].strip()
                                emotion = emotion[0:3]
                                # count = count +1
                                # if count <6:
                                #     # print(line)
                                #     print(emotion)
                            lab = IEM.get(emotion, 12)
                            if lab != 12:
                                emo = EMOTIONS.get(lab, 'unknown')
                                emos.append(emo)
                                labels.append(lab)
                                file_with_directory = os.path.join(file_directory, file_name_with_extension)
                                file_directories.append(file_with_directory)
                                file_names.append(file_name_with_extension)
                            break  # Stop searching once emotion is found for the current .wav file

        all_labels.extend(labels)
        all_directories.extend(file_directories)
        all_file_names.extend(file_names)
        all_emos.extend(emos)

    return all_labels, all_directories, all_file_names, all_emos

# Extract emotion labels, file directories, and file names for multiple sessions
emotion_labels, wav_file_directories, wav_file_names, all_emos = extract_emotion_labels_and_files_for_sessions(sessions)





**COMBINE DATASETS**

In [None]:
FullPaths = T_filtered_filenames + C_filtered_filenames + R_filtered_filenames + JL_filtered_filenames + wav_file_directories
FFNames =  T_fname + C_fname + R_fname + JL_fname + wav_file_names
FLabels = T_labels + C_labels + R_labels + JL_labels + emotion_labels
FFEmo = T_emotion_labels + C_emotion_labels + R_emotion_labels + JL_emotion_labels + all_emos


df = pd.DataFrame({
    'fname': FFNames,
    'filename': FullPaths,
    'label': FLabels,
    'emotion_label': FFEmo
})


df.head (10)

In [None]:
df['emotion_label'].value_counts()

In [None]:

fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=range(6), height=df['emotion_label'].value_counts())
ax.set_xticks(ticks=range(0, 6))
ax.set_xticklabels([EMOTIONS[i] for i in range(0,6)], fontsize=10, rotation=45, ha='right') # Rotating labels by 45 degrees
ax.set_xlabel('Emotions')
ax.set_ylabel('Number of examples')

In [None]:
from sklearn.model_selection import train_test_split

# Specify the test size (in this case, 20% for testing, 80% for training)
test_size = 0.2

# Split the DataFrame into training and testing sets
train_df, test_df = train_test_split(df, test_size=test_size, random_state=42)

# Display the shapes of the resulting DataFrames
print("Train DataFrame shape:", train_df.shape)
print("Test DataFrame shape:", test_df.shape)

**DATA PRE-PROCESSING**

In this section we will remove noise from the signals and convert them to Mel-frequency cepstral coefficients (MFCC)

In [None]:
# Preprocessing parameters
sr = 44100 # Sampling rate
duration = 5
hop_length = 347 # to make time steps 128
fmin = 20
fmax = sr // 2
n_mels = 128
n_fft = n_mels * 20
samples = sr * duration

In [None]:
import librosa
from tqdm.notebook import tqdm_notebook

In [None]:
from scipy.io import wavfile
import noisereduce as nr
# load data
rate, data = wavfile.read("D:\Documents\MASC\Emo_rec_001\TESS Toronto emotional speech set data\OAF_angry\OAF_back_angry.wav")
# perform noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=rate)
wavfile.write("mywav_reduced_noise.wav", rate, reduced_noise)

In [None]:
# import librosa

def read_audio(path, sr=44100, samples=44100):
    '''
    Reads in the audio file and returns
    an array that we can turn into a melspectrogram
    '''
    y, sr = librosa.core.load(path, sr=sr)
    #print (y.shape)
    y = nr.reduce_noise(y=y, sr=sr)
    #print ("New Y", y.shape)

    # trim silence
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y)

    if len(y) > samples: # long enough
        y = y[0:0+samples]
    else: # pad blank
        padding = samples - len(y)
        offset = padding // 2
        y = np.pad(y, (offset, samples - len(y) - offset), 'constant')

    return y, sr

def audio_to_melspectrogram(audio, sr, n_mels=128, hop_length=512, n_fft=2048, fmin=0, fmax=None):
    '''
    Convert to melspectrogram after audio is read in
    '''
    spectrogram = librosa.feature.melspectrogram(y=audio,
                                                 sr=sr,
                                                 n_mels=n_mels,
                                                 hop_length=hop_length,
                                                 n_fft=n_fft,
                                                 fmin=fmin,
                                                 fmax=fmax)
    return librosa.power_to_db(spectrogram).astype(np.float32)

def read_as_melspectrogram(path):
    '''
    Convert audio into a melspectrogram
    so we can use machine learning
    '''
    afile, sr = read_audio(path)
    mels = audio_to_melspectrogram(afile, sr = sr)
    return mels

def convert_wav_to_image(df):
    X = []
    for _,row in tqdm_notebook(df.iterrows()):
        file_path = row['filename']
        print(file_path)
        x = read_as_melspectrogram(file_path)
        X.append(x.transpose())
    return X

def convert_wav_to_image2(df):
    X = []
    for _, row in df.iterrows():
        file_path = row['filename']
        # print(file_path)
        x = read_as_melspectrogram(file_path)
        X.append(x.transpose())
    return X


def normalize(img):
    '''
    Normalizes an array
    (subtract mean and divide by standard deviation)
    '''
    eps = 0.001
    if np.std(img) != 0:
        img = (img - np.mean(img)) / np.std(img)
    else:
        img = (img - np.mean(img)) / eps
    return img

def normalize_dataset(X):
    '''
    Normalizes list of arrays
    (subtract mean and divide by standard deviation)
    '''
    normalized_dataset = []
    for img in X:
        normalized = normalize(img)
        normalized_dataset.append(normalized)
    return normalized_dataset

In [None]:
save_directory = r'D:\Documents\MASC\Emo_rec_001\SAVED_SETS\LSTM_RAVDESS_CREMAD'

file_paths = train_df['filename'].tolist()

# Preprocess dataset and create validation sets
X = np.array(convert_wav_to_image(train_df))  # Assuming you have a function convert_wav_to_image
X = normalize_dataset(X)
Y = train_df['label'].values

np.save(os.path.join(save_directory, 'X_Tr008.npy'), X)
np.save(os.path.join(save_directory, 'Y_Tr008.npy'), Y)

X_test = np.array(convert_wav_to_image(test_df))  # Assuming you have a function convert_wav_to_image
X_test = normalize_dataset(X_test)
Y_test = test_df['label'].values


np.save(os.path.join(save_directory, 'X_Te008.npy'), X_test)
np.save(os.path.join(save_directory, 'Y_Te008.npy'), Y_test)

