In [26]:
# ------------------ Config + Imports ------------------

import os
import numpy as np
import librosa
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

IMG_SIZE = (128, 128)
DATASET_PATH = "./data/Emotions"
BATCH_SIZE = 32
EPOCHS = 50

# ------------------ Feature Extraction ------------------

def extract_mel_spectrogram(file_path, img_size=(128, 128), augment=False):
    y, sr = librosa.load(file_path, sr=22050)

    if augment:
        if np.random.rand() < 0.5:
            y = y + 0.005 * np.random.randn(len(y))  # Zgomot alb
        if np.random.rand() < 0.5:
            y = librosa.effects.pitch_shift(y, sr=sr, n_steps=np.random.choice([-1, 1]))

    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=img_size[0])
    S_DB = librosa.power_to_db(S, ref=np.max)
    S_DB = librosa.util.fix_length(S_DB, size=img_size[1], axis=1)
    return S_DB

# ------------------ Dataset Loader ------------------

def load_dataset():
    X, y = [], []

    for emotion_folder in os.listdir(DATASET_PATH):
        folder_path = os.path.join(DATASET_PATH, emotion_folder)
        if not os.path.isdir(folder_path):
            continue

        for file in tqdm(os.listdir(folder_path), desc=emotion_folder):
            if not file.endswith(".wav"):
                continue
            try:
                file_path = os.path.join(folder_path, file)
                spec = extract_mel_spectrogram(file_path, IMG_SIZE, augment=True)
                X.append(spec)
                y.append(emotion_folder)
            except Exception as e:
                print(f"Error with file {file}: {e}")
                continue

    X = np.array(X)[..., np.newaxis]  # (samples, H, W, 1)
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_cat = to_categorical(y_encoded)
    return X, y_cat, le

# ------------------ CNN Model ------------------

def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(256, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        GlobalAveragePooling2D(),
        Dense(256, activation='relu'),
        Dropout(0.4),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=3e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model




Angry: 100%|██████████| 2167/2167 [00:54<00:00, 39.95it/s]
Disgusted: 100%|██████████| 1863/1863 [01:32<00:00, 20.21it/s]
Fearful: 100%|██████████| 2047/2047 [01:38<00:00, 20.84it/s]
Happy: 100%|██████████| 2167/2167 [02:06<00:00, 17.19it/s]
Neutral: 100%|██████████| 1795/1795 [01:57<00:00, 15.22it/s]
Sad: 100%|██████████| 2167/2167 [02:51<00:00, 12.62it/s]
Suprised: 100%|██████████| 592/592 [00:41<00:00, 14.17it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 454ms/step - accuracy: 0.2314 - loss: 1.8772 - val_accuracy: 0.2816 - val_loss: 1.7990 - learning_rate: 3.0000e-04
Epoch 2/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 462ms/step - accuracy: 0.3704 - loss: 1.5330 - val_accuracy: 0.2922 - val_loss: 1.8122 - learning_rate: 3.0000e-04
Epoch 3/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 463ms/step - accuracy: 0.4533 - loss: 1.3647 - val_accuracy: 0.4316 - val_loss: 1.4204 - learning_rate: 3.0000e-04
Epoch 4/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 311ms/step - accuracy: 0.4997 - loss: 1.2728 - val_accuracy: 0.5137 - val_loss: 1.2533 - learning_rate: 3.0000e-04
Epoch 5/50
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 312ms/step - accuracy: 0.5209 - loss: 1.2160 - val_accuracy: 0.3887 - val_loss: 1.8365 - learning_rate: 3.0000e-04
Epoch 6/50
[1m320/320[0m [3



Test Accuracy: 0.65


AttributeError: module 'h5py' has no attribute 'File'