In [1]:
import os
import random
import numpy as np
from glob import glob
from tqdm import tqdm

import librosa
import librosa.effects as le
from tensorflow.image import resize
import tensorflow as tf

from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import joblib
from sklearn.utils.class_weight import compute_class_weight

# Set audio directory path
base_dir = './Emotions'
emotion_folders = {
    'Angr': 0,
    'Fearful': 1,
    'Happy': 2,
    'Sad': 3
}

target_shape = (128, 128)

def augment_audio(y, sr):
    if random.random() < 0.5:
        y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=random.choice([-2, -1, 1, 2]))
    if random.random() < 0.5:
        y += 0.005 * np.random.randn(len(y))
    return y
# Load and process all audio files
X = []
y = []

print("Extracting features...")
for emotion_name, label in emotion_folders.items():
    files = sorted(glob(os.path.join(base_dir, emotion_name, '*.wav')))
    with tqdm(total=len(files), desc=emotion_name) as pbar:
        for file_path in files:
            try:
                audio, sr = librosa.load(file_path, sr=None)
                audio = augment_audio(audio, sr)
                audio = le.time_stretch(audio, rate=1.0)
                mel = librosa.feature.melspectrogram(y=audio, sr=sr)
                mel = resize(np.expand_dims(mel, axis=-1), target_shape)
                X.append(mel)
                y.append(label)
            except Exception as e:
                print(f"Error with {file_path}: {e}")
            pbar.update(1)

# Shuffle
combined = list(zip(X, y))
random.shuffle(combined)
X, y = zip(*combined)


# Convert to arrays
y = to_categorical(y, num_classes=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
X_train = tf.stack(X_train)
X_test = tf.stack(X_test)
y_train = tf.stack(y_train)
y_test = tf.stack(y_test)

# Class weights to combat imbalance
y_train_int = np.argmax(y_train.numpy(), axis=1)
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_int), y=y_train_int)
class_weights_dict = dict(enumerate(class_weights))


# Build CNN model
input_shape = X_train.shape[1:]
input_layer = Input(shape=input_shape)
x = Conv2D(16, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
output_layer = Dense(4, activation='softmax')(x)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])




2025-05-05 22:48:44.515327: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-05 22:48:44.523384: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-05 22:48:44.610802: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-05 22:48:44.690591: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746503324.779087    5860 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746503324.80

Extracting features...


Angr:   0%|          | 0/2167 [00:00<?, ?it/s]2025-05-05 22:48:48.183804: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
Angr: 100%|██████████| 2167/2167 [02:56<00:00, 12.28it/s]
Fearful: 100%|██████████| 2047/2047 [02:18<00:00, 14.75it/s]
Happy: 100%|██████████| 2167/2167 [02:20<00:00, 15.40it/s]
Sad: 100%|██████████| 2167/2167 [03:14<00:00, 11.14it/s]


In [3]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support


# Train the model
print("Training model...")
model.fit(X_train, y_train, epochs=40, batch_size=32, validation_data=(X_test, y_test))

# Evaluate
test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_accuracy[1]:.4f}")

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test.numpy(), axis=1)

precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='micro')

print(f"Micro-Precision: {precision:.3f}")
print(f"Micro-Recall:    {recall:.3f}")
print(f"Micro-F1 Score:  {f1:.3f}")
print(classification_report(y_test, y_pred))
print(classification_report(y_true, y_pred, target_names=['Angry', 'Fearful', 'Happy', 'Sad']))


Training model...
Epoch 1/40
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 47ms/step - accuracy: 0.8985 - loss: 0.2714 - val_accuracy: 0.5887 - val_loss: 6.4823
Epoch 2/40
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 49ms/step - accuracy: 0.9001 - loss: 0.2624 - val_accuracy: 0.5863 - val_loss: 6.6596
Epoch 3/40
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 48ms/step - accuracy: 0.9062 - loss: 0.2458 - val_accuracy: 0.5938 - val_loss: 7.1900
Epoch 4/40
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 50ms/step - accuracy: 0.9011 - loss: 0.2531 - val_accuracy: 0.5873 - val_loss: 7.2220
Epoch 5/40
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 48ms/step - accuracy: 0.8982 - loss: 0.2429 - val_accuracy: 0.5714 - val_loss: 6.7206
Epoch 6/40
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 49ms/step - accuracy: 0.8892 - loss: 0.2810 - val_accuracy: 0.5615 - val_loss: 7.8701

ValueError: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets

In [5]:

print(classification_report(y_true, y_pred, target_names=['Angry', 'Fearful', 'Happy', 'Sad']))

              precision    recall  f1-score   support

       Angry       0.64      0.65      0.65       563
     Fearful       0.52      0.37      0.44       502
       Happy       0.49      0.58      0.53       545
         Sad       0.66      0.70      0.68       527

    accuracy                           0.58      2137
   macro avg       0.58      0.58      0.57      2137
weighted avg       0.58      0.58      0.58      2137

