In [1]:
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from joblib import Parallel, delayed, dump, load
import tensorflow as tf

# Set paths
Root_dir = 'C:/Users/User/Desktop/new/Dataset'
Crema_path = Root_dir + "/Crema/" 
Ravdess_path = Root_dir + "/Ravdess/"
Savee_path = Root_dir + "/Savee/"
Tess_path = Root_dir + "/Tess/"

# Limit the number of samples to 500 from each dataset for faster training
Crema_dir_list = os.listdir(Crema_path)[:500]
Ravdess_dir_list = os.listdir(Ravdess_path)[:500]
Savee_dir_list = os.listdir(Savee_path)[:500]
Tess_dir_list = os.listdir(Tess_path)[:500]

# Load and preprocess datasets
def load_crema_data(Crema_path, Crema_dir_list):
    emotions_crema = []
    paths_crema = []
    for it in Crema_dir_list:
        paths_crema.append(Crema_path + it)
        part = it.split('_')
        if part[2] == 'SAD':
            emotions_crema.append('sad')
        elif part[2] == 'ANG':
            emotions_crema.append('angry')
        elif part[2] == 'DIS':
            emotions_crema.append('disgust')
        elif part[2] == 'FEA':
            emotions_crema.append('fear')
        elif part[2] == 'HAP':
            emotions_crema.append('happy')
        elif part[2] == 'NEU':
            emotions_crema.append('neutral')
        else:
            emotions_crema.append('Unknown')
    
    emotions_crema_df = pd.DataFrame(emotions_crema, columns=['Emotions'])
    path_crema_df = pd.DataFrame(paths_crema, columns=['Path'])
    return pd.concat([emotions_crema_df, path_crema_df], axis=1)   

def load_ravdess_data(Ravdess_path, Ravdess_dir_list):
    emotions_ravdess = []
    path_ravdess = []
    for it in Ravdess_dir_list:
        actor = os.listdir(Ravdess_path + it)[:500]
        for file in actor:
            part = file.split('.')[0]
            part = part.split('-')
            emotions_ravdess.append(int(part[2]))
            path_ravdess.append(Ravdess_path + it + '/' + file)
    
    emotion_ravdess_df = pd.DataFrame(emotions_ravdess, columns=['Emotions'])
    path_ravdess_df = pd.DataFrame(path_ravdess, columns=['Path'])
    Ravdess_df = pd.concat([emotion_ravdess_df, path_ravdess_df], axis=1)
    Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
    return Ravdess_df

def load_savee_data(Savee_path, Savee_dir_list):
    emotions_savee = []
    path_savee = []
    for it in Savee_dir_list:
        path_savee.append(Savee_path + it)
        part = it.split('_')[1]
        part = part[:-6]
        if part == 'a':
            emotions_savee.append('angry')
        elif part == 'd':
            emotions_savee.append('disgust')
        elif part == 'f':
            emotions_savee.append('fear')
        elif part == 'h':
            emotions_savee.append('happiness')
        elif part == 'n':
            emotions_savee.append('neutral')
        elif part == 'sa':
            emotions_savee.append('sadness')
        elif part == 'su':
            emotions_savee.append('surprise')
        else:
            emotions_savee.append('Unknown')
    
    emotion_savee_df = pd.DataFrame(emotions_savee, columns=['Emotions'])
    path_savee_df = pd.DataFrame(path_savee, columns=['Path'])
    return pd.concat([emotion_savee_df, path_savee_df], axis=1)

def load_tess_data(Tess_path, Tess_dir_list):
    emotions_tess = []
    path_tess = []
    for it in Tess_dir_list:
        directories = os.listdir(Tess_path + '/' + it)[:500]
        for file in directories:
            part = file.split('.')[0]
            part = part.split('_')[2]
            if part == 'ps':
                emotions_tess.append('surprise')
            else:
                emotions_tess.append(part)
            path_tess.append(Tess_path + it + '/' + file)
    
    emotion_tess_df = pd.DataFrame(emotions_tess, columns=['Emotions'])
    path_tess_df = pd.DataFrame(path_tess, columns=['Path'])
    return pd.concat([emotion_tess_df, path_tess_df], axis=1)

Crema_df = load_crema_data(Crema_path, Crema_dir_list)
Ravdess_df = load_ravdess_data(Ravdess_path, Ravdess_dir_list)
Savee_df = load_savee_data(Savee_path, Savee_dir_list)
Tess_df = load_tess_data(Tess_path, Tess_dir_list)

# Merging all datasets
data_path = []
data_emotion = []

def append_data(dataset):
    for path, emotion in zip(dataset.Path, dataset.Emotions):
        data_path.append(path)
        data_emotion.append(emotion)

append_data(Crema_df)
append_data(Ravdess_df)
append_data(Savee_df)
append_data(Tess_df)

All_data = pd.DataFrame(data_emotion, columns=['Emotions'])
All_data['Path'] = data_path

# Encode labels
label_encoder = LabelEncoder()
All_data['Emotion_Label'] = label_encoder.fit_transform(All_data['Emotions'])

# Split data
X = All_data['Path'].values
y = All_data['Emotion_Label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=61)

# Feature Extraction Functions
def extract_features(file_path):
    signal, sr = librosa.load(file_path, sr=44100)
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=signal).T, axis=0)
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=signal, sr=sr).T, axis=0)
    mfcc = np.mean(librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=40).T, axis=0)
    rms = np.mean(librosa.feature.rms(y=signal).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=signal, sr=sr).T, axis=0)
    return np.hstack([zcr, chroma_stft, mfcc, rms, mel])

def parallel_feature_extraction(file_paths):
    return Parallel(n_jobs=-1)(delayed(extract_features)(file) for file in file_paths)

# Extract features and save them to disk
if not os.path.exists('X_train_features.npy') or not os.path.exists('X_test_features.npy'):
    X_train_features = np.array(parallel_feature_extraction(X_train))
    X_test_features = np.array(parallel_feature_extraction(X_test))
    np.save('X_train_features.npy', X_train_features)
    np.save('X_test_features.npy', X_test_features)
else:
    X_train_features = np.load('X_train_features.npy')
    X_test_features = np.load('X_test_features.npy')

# Normalize features
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features)
X_test_features = scaler.transform(X_test_features)

# Reshape features for GRU input
X_train_features = np.expand_dims(X_train_features, axis=2)
X_test_features = np.expand_dims(X_test_features, axis=2)

# Define Model
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_features.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    GRU(128, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    Dropout(0.3),
    GRU(64, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=0.00001)

# Train Model
history = model.fit(
    X_train_features, y_train, 
    epochs=50, batch_size=32, 
    validation_split=0.2, 
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate Model
loss, accuracy = model.evaluate(X_test_features, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Classification Report
y_pred = model.predict(X_test_features)
y_pred_labels = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_labels, target_names=label_encoder.classes_))


Epoch 1/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 175ms/step - accuracy: 0.1487 - loss: 3.8888 - val_accuracy: 0.1423 - val_loss: 2.6881 - learning_rate: 0.0010
Epoch 2/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 161ms/step - accuracy: 0.2594 - loss: 2.2572 - val_accuracy: 0.2333 - val_loss: 2.2319 - learning_rate: 0.0010
Epoch 3/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 195ms/step - accuracy: 0.3011 - loss: 1.9741 - val_accuracy: 0.2751 - val_loss: 2.0330 - learning_rate: 0.0010
Epoch 4/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 207ms/step - accuracy: 0.3746 - loss: 1.7999 - val_accuracy: 0.3481 - val_loss: 1.9015 - learning_rate: 0.0010
Epoch 5/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 209ms/step - accuracy: 0.4057 - loss: 1.6895 - val_accuracy: 0.3756 - val_loss: 1.7666 - learning_rate: 0.0010
Epoch 6/50
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0