In [4]:
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from joblib import Parallel, delayed, dump, load
import tensorflow as tf
import joblib

ModuleNotFoundError: No module named 'matplotlib'

In [10]:
# Set paths
Root_dir = 'C:/Users/Yubraj/Desktop/miky/miky/Dataset'
Crema_path = Root_dir + "/Crema/" 
Ravdess_path = Root_dir + "/Ravdess/"
Savee_path = Root_dir + "/Savee/"
Tess_path = Root_dir + "/Tess/"

In [11]:
# Limit the number of samples to 300 from each dataset for increased dataset size
Crema_dir_list = os.listdir(Crema_path)[:300]
Ravdess_dir_list = os.listdir(Ravdess_path)[:300]
Savee_dir_list = os.listdir(Savee_path)[:300]
Tess_dir_list = os.listdir(Tess_path)[:300]

In [12]:
# Load and preprocess datasets
# Crema dataset
emotions_crema = []
paths_crema = []
for it in Crema_dir_list:
    paths_crema.append(Crema_path + it)
    part = it.split('_')
    if part[2] == 'SAD':
        emotions_crema.append('sad')
    elif part[2] == 'ANG':
        emotions_crema.append('angry')
    elif part[2] == 'DIS':
        emotions_crema.append('disgust')
    elif part[2] == 'FEA':
        emotions_crema.append('fear')
    elif part[2] == 'HAP':
        emotions_crema.append('happy')
    elif part[2] == 'NEU':
        emotions_crema.append('neutral')
    else:
        emotions_crema.append('Unknown')

emotions_crema_df = pd.DataFrame(emotions_crema, columns=['Emotions'])
path_crema_df = pd.DataFrame(paths_crema, columns=['Path'])
Crema_df = pd.concat([emotions_crema_df, path_crema_df], axis=1)   

In [13]:
# Ravdess dataset
emotions_ravdess = []
path_ravdess = []
for it in Ravdess_dir_list:
    actor = os.listdir(Ravdess_path + it)[:300]
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        emotions_ravdess.append(int(part[2]))
        path_ravdess.append(Ravdess_path + it + '/' + file)

emotion_ravdess_df = pd.DataFrame(emotions_ravdess, columns=['Emotions'])
path_ravdess_df = pd.DataFrame(path_ravdess, columns=['Path'])
Ravdess_df = pd.concat([emotion_ravdess_df, path_ravdess_df], axis=1)
Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)

In [14]:
# Savee dataset
emotions_savee = []
path_savee = []
for it in Savee_dir_list:
    path_savee.append(Savee_path + it)
    part = it.split('_')[1]
    part = part[:-6]
    if part == 'a':
        emotions_savee.append('angry')
    elif part == 'd':
        emotions_savee.append('disgust')
    elif part == 'f':
        emotions_savee.append('fear')
    elif part == 'h':
        emotions_savee.append('happiness')
    elif part == 'n':
        emotions_savee.append('neutral')
    elif part == 'sa':
        emotions_savee.append('sadness')
    elif part == 'su':
        emotions_savee.append('surprise')
    else:
        emotions_savee.append('Unknown')

emotion_savee_df = pd.DataFrame(emotions_savee, columns=['Emotions'])
path_savee_df = pd.DataFrame(path_savee, columns=['Path'])
Savee_df = pd.concat([emotion_savee_df, path_savee_df], axis=1)

In [15]:
# Tess dataset
emotions_tess = []
path_tess = []
for it in Tess_dir_list:
    directories = os.listdir(Tess_path + '/' + it)[:300]
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part == 'ps':
            emotions_tess.append('surprise')
        else:
            emotions_tess.append(part)
        path_tess.append(Tess_path + it + '/' + file)

emotion_tess_df = pd.DataFrame(emotions_tess, columns=['Emotions'])
path_tess_df = pd.DataFrame(path_tess, columns=['Path'])
Tess_df = pd.concat([emotion_tess_df, path_tess_df], axis=1)


In [16]:
# Merging all datasets
data_path = []
data_emotion = []

def append_data(dataset):
    for path, emotion in zip(dataset.Path, dataset.Emotions):
        data_path.append(path)
        data_emotion.append(emotion)

append_data(Crema_df)
append_data(Ravdess_df)
append_data(Savee_df)
append_data(Tess_df)

All_data = pd.DataFrame(data_emotion, columns=['Emotions'])
All_data['Path'] = data_path

In [17]:
# Display samples of each dataset
print("CREMA Dataset Sample:")
print(Crema_df.head())

print("\nRAVDESS Dataset Sample:")
print(Ravdess_df.head())

print("\nSAVEE Dataset Sample:")
print(Savee_df.head())

print("\nTESS Dataset Sample:")
print(Tess_df.head())

# Display sample of the merged dataset
print("\nMerged Dataset Sample:")
print(All_data.head())


CREMA Dataset Sample:
  Emotions                                               Path
0    angry  C:/Users/User/Desktop/new/Dataset/Crema/1001_D...
1  disgust  C:/Users/User/Desktop/new/Dataset/Crema/1001_D...
2     fear  C:/Users/User/Desktop/new/Dataset/Crema/1001_D...
3    happy  C:/Users/User/Desktop/new/Dataset/Crema/1001_D...
4  neutral  C:/Users/User/Desktop/new/Dataset/Crema/1001_D...

RAVDESS Dataset Sample:
  Emotions                                               Path
0  neutral  C:/Users/User/Desktop/new/Dataset/Ravdess/Acto...
1  neutral  C:/Users/User/Desktop/new/Dataset/Ravdess/Acto...
2  neutral  C:/Users/User/Desktop/new/Dataset/Ravdess/Acto...
3  neutral  C:/Users/User/Desktop/new/Dataset/Ravdess/Acto...
4     calm  C:/Users/User/Desktop/new/Dataset/Ravdess/Acto...

SAVEE Dataset Sample:
  Emotions                                               Path
0    angry  C:/Users/User/Desktop/new/Dataset/Savee/DC_a01...
1    angry  C:/Users/User/Desktop/new/Dataset/Savee/DC_a02...


In [18]:
# Encode labels
label_encoder = LabelEncoder()
All_data['Emotion_Label'] = label_encoder.fit_transform(All_data['Emotions'])

In [19]:
# Split data
X = All_data['Path'].values
y = All_data['Emotion_Label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=61)

In [20]:
# Feature Extraction Functions
def extract_features(file_path):
    signal, sr = librosa.load(file_path, sr=44100)
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=signal).T, axis=0)
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=signal, sr=sr).T, axis=0)
    mfcc = np.mean(librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=40).T, axis=0)
    rms = np.mean(librosa.feature.rms(y=signal).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=signal, sr=sr).T, axis=0)
    return np.hstack([zcr, chroma_stft, mfcc, rms, mel])

def parallel_feature_extraction(file_paths):
    return Parallel(n_jobs=-1)(delayed(extract_features)(file) for file in file_paths)

In [21]:
def save_features(features, filename):
    joblib.dump(features, filename)


In [22]:
def load_features(filename):
    return joblib.load(filename)

In [23]:
# Extract and save features if not already saved
features_filename_train = 'X_train_features.joblib'
features_filename_test = 'X_test_features.joblib'

if not os.path.exists(features_filename_train) or not os.path.exists(features_filename_test):
    X_train_features = np.array(parallel_feature_extraction(X_train))
    X_test_features = np.array(parallel_feature_extraction(X_test))
    save_features(X_train_features, features_filename_train)
    save_features(X_test_features, features_filename_test)
else:
    X_train_features = load_features(features_filename_train)
    X_test_features = load_features(features_filename_test)

In [24]:
# Extract features
# X_train_features = np.array(parallel_feature_extraction(X_train))
# X_test_features = np.array(parallel_feature_extraction(X_test))

In [25]:
# Normalize features
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features)
X_test_features = scaler.transform(X_test_features)

In [26]:
# Reshape features for GRU input
X_train_features = np.expand_dims(X_train_features, axis=2)
X_test_features = np.expand_dims(X_test_features, axis=2)

In [27]:
# Define Model
model = Sequential([
    Conv1D(128, kernel_size=3, activation='relu', input_shape=(X_train_features.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    GRU(256, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    Dropout(0.5),
    GRU(128, kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    Dense(len(label_encoder.classes_), activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [28]:
# Implement Early Stopping and Learning Rate Reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)


In [29]:
# Train Model
history = model.fit(X_train_features, y_train, epochs=30, batch_size=32, validation_data=(X_test_features, y_test), callbacks=[early_stopping, reduce_lr])

Epoch 1/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 192ms/step - accuracy: 0.1774 - loss: 5.0883 - val_accuracy: 0.1952 - val_loss: 2.6683 - learning_rate: 0.0010
Epoch 2/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 172ms/step - accuracy: 0.2392 - loss: 2.2727 - val_accuracy: 0.1818 - val_loss: 2.2068 - learning_rate: 0.0010
Epoch 3/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 255ms/step - accuracy: 0.3140 - loss: 1.9008 - val_accuracy: 0.4163 - val_loss: 1.7147 - learning_rate: 0.0010
Epoch 4/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 261ms/step - accuracy: 0.4117 - loss: 1.6078 - val_accuracy: 0.4607 - val_loss: 1.5607 - learning_rate: 0.0010
Epoch 5/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 192ms/step - accuracy: 0.4886 - loss: 1.4550 - val_accuracy: 0.4762 - val_loss: 1.4588 - learning_rate: 0.0010
Epoch 6/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [30]:

# Evaluate Model
loss, accuracy = model.evaluate(X_test_features, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 103ms/step - accuracy: 0.7171 - loss: 0.8082
Test Accuracy: 0.7273


In [31]:
# Evaluate Model
loss, accuracy = model.evaluate(X_test_features, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 104ms/step - accuracy: 0.7171 - loss: 0.8082
Test Loss: 0.7857
Test Accuracy: 0.7273


In [32]:
# Save model, encoder, and scaler
model.save('final_audio_emotion_model.h5')
dump(label_encoder, 'label_encoder.joblib')
dump(scaler, 'scaler.joblib')



['scaler.joblib']

In [33]:
from sklearn.metrics import classification_report

# Predict on test data
y_pred = model.predict(X_test_features)
y_pred_labels = np.argmax(y_pred, axis=1)

# Classification report
print(classification_report(y_test, y_pred_labels, target_names=label_encoder.classes_))



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 150ms/step
              precision    recall  f1-score   support

       angry       0.84      0.70      0.77       138
        calm       0.36      0.61      0.45        41
     disgust       0.64      0.71      0.67       136
        fear       0.75      0.77      0.76       146
   happiness       0.29      0.40      0.33        10
       happy       0.85      0.65      0.74       135
     neutral       1.00      0.77      0.87       109
         sad       0.71      0.72      0.72       122
     sadness       0.33      1.00      0.50         3
    surprise       0.72      0.83      0.77       128

    accuracy                           0.73       968
   macro avg       0.65      0.72      0.66       968
weighted avg       0.76      0.73      0.74       968



In [34]:
def predict_emotion(file_path):
    # Extract features
    features = extract_features(file_path)
    
    # Normalize features
    features = scaler.transform([features])
    
    # Reshape features for GRU input
    features = np.expand_dims(features, axis=2)
    
    # Make prediction
    y_pred = model.predict(features)
    y_pred_label = np.argmax(y_pred, axis=1)
    
    # Decode label to emotion
    predicted_emotion = label_encoder.inverse_transform(y_pred_label)
    
    return predicted_emotion[0]

In [35]:

# Example usage
sample_path = "C:/Users/User/Desktop/new/Dataset/Crema/1001_MTI_HAP_XX.wav"
predicted_emotion = predict_emotion(sample_path)
print(f"Predicted Emotion: {predicted_emotion}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
Predicted Emotion: fear
