In [None]:
# ------------------ Config + Imports ------------------

import os
import numpy as np
import librosa
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

IMG_SIZE = (128, 128)
DATASET_PATH = "D:/Downloads/archive/Emotions"
BATCH_SIZE = 32
EPOCHS = 1

# ------------------ Feature Extraction ------------------

def extract_mel_spectrogram(file_path, img_size=(128, 128), augment=False):
    y, sr = librosa.load(file_path, sr=22050)

    if augment:
        if np.random.rand() < 0.5:
            y = y + 0.005 * np.random.randn(len(y))  # Zgomot alb
        if np.random.rand() < 0.5:
            y = librosa.effects.pitch_shift(y, sr=sr, n_steps=np.random.choice([-1, 1]))

    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=img_size[0])
    S_DB = librosa.power_to_db(S, ref=np.max)
    S_DB = librosa.util.fix_length(S_DB, size=img_size[1], axis=1)
    return S_DB

# ------------------ Dataset Loader ------------------

def load_dataset():
    X, y = [], []

    for emotion_folder in os.listdir(DATASET_PATH):
        folder_path = os.path.join(DATASET_PATH, emotion_folder)
        if not os.path.isdir(folder_path):
            continue

        for file in tqdm(os.listdir(folder_path), desc=emotion_folder):
            if not file.endswith(".wav"):
                continue
            try:
                file_path = os.path.join(folder_path, file)
                spec = extract_mel_spectrogram(file_path, IMG_SIZE, augment=True)
                X.append(spec)
                y.append(emotion_folder)
            except Exception as e:
                print(f"Error with file {file}: {e}")
                continue

    X = np.array(X)[..., np.newaxis]  # (samples, H, W, 1)
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_cat = to_categorical(y_encoded)
    return X, y_cat, le

# ------------------ CNN Model ------------------

def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(256, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        GlobalAveragePooling2D(),
        Dense(256, activation='relu'),
        Dropout(0.4),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=3e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

 #------------------ Training Pipeline ------------------

# Incarca datele
X, y, label_encoder = load_dataset()

# Impartire train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)


Angry: 100%|██████████| 2153/2153 [04:57<00:00,  7.24it/s]
Disgusted: 100%|██████████| 1863/1863 [03:49<00:00,  8.13it/s]
Fearful: 100%|██████████| 2047/2047 [04:18<00:00,  7.92it/s]
Happy: 100%|██████████| 2167/2167 [03:50<00:00,  9.38it/s]
Neutral: 100%|██████████| 1795/1795 [03:21<00:00,  8.92it/s]
Sad: 100%|██████████| 2167/2167 [07:14<00:00,  4.98it/s]
Suprised: 100%|██████████| 592/592 [02:20<00:00,  4.22it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1204s[0m 4s/step - accuracy: 0.2384 - loss: 1.8622 - val_accuracy: 0.2327 - val_loss: 1.8885 - learning_rate: 3.0000e-04
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 623ms/step - accuracy: 0.2325 - loss: 1.8763




Test Accuracy: 0.23


In [None]:
#------------------ Training Pipeline ------------------



# Construieste modelul
model = build_cnn_model(input_shape=X.shape[1:], num_classes=y.shape[1])
model.summary()

# Callback-uri
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
]

# Antrenam modelul
model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks
)

# Evaluam modelul
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

# Salvam modelul in format compatibil TensorFlow
model.save("emotion_cnn_audio_only.keras", save_format="tf")