In [79]:
import librosa #Librairie utilisé pour analyser les audios
import soundfile #Librairie servant à lire et écrire des fichiers audios
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [80]:
# Fonction pour extraire les fonctionnalités d'un fichier audio
def extraire_fonctionnalites(nom_fichier, mfcc, chroma, mel):
    X, sample_rate = librosa.load(nom_fichier, sr=None)
    if chroma:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
        result = np.hstack((result, mel))
    return result

In [81]:
emotions = {
    '01' :'neutral',
    '02' : 'calm',
    '03' : 'happy',
    '04' : 'sad',
    '05' : 'angry',
    '06' : 'fearful',
    '07' : 'disgust',
    '08' : 'surprised'
}
observed_emotions = ['calm', 'happy', 'fearful', 'disgust']

In [82]:
def charger_donnees(taille_test=0.2):
    x, y = [], []
    for fichier in glob.glob("/Users/yilmaz-a/Documents/Computer Science/SER_file/speech-emotion-recognition-ravdess-data/Actor_*/*.wav"):
        nom_fichier = os.path.basename(fichier)
        emotion = emotions[nom_fichier.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        fonctionnalite = extraire_fonctionnalites(fichier, mfcc=True, chroma=True, mel=True)
        x.append(fonctionnalite)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=taille_test, random_state=9)

In [83]:
x_train, x_test, y_train, y_test = charger_donnees(taille_test=0.25)

In [84]:
print((x_train.shape[0], x_test.shape[0]))


(576, 192)


In [85]:
print(f'Features extracted: {x_train.shape[1]}')


Features extracted: 180


In [86]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(550,), learning_rate='adaptive', max_iter=500)

In [87]:
model.fit(x_train,y_train)


In [88]:
y_pred=model.predict(x_test)


In [89]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)


In [90]:
print("Accuracy: {:.2f}%".format(accuracy*100))


Accuracy: 61.98%
