In [62]:
import os
import librosa
# 
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from matplotlib import pyplot
# 
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import np_utils
# 
from sklearn import preprocessing, svm
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

In [4]:
Tess = "Tess"

tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for file in tess_directory_list:
    part = file.split('.')[0].split('_')[2]
    if part=='ps':
        file_emotion.append('surprise')
    else:
        file_emotion.append(part)
        
    file_path.append(Tess + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)
Tess_df


Unnamed: 0,Emotions,Path
0,angry,Tess/OAF_back_angry.wav
1,disgust,Tess/OAF_back_disgust.wav
2,fear,Tess/OAF_back_fear.wav
3,happy,Tess/OAF_back_happy.wav
4,neutral,Tess/OAF_back_neutral.wav
...,...,...
2795,fear,Tess/YAF_youth_fear.wav
2796,happy,Tess/YAF_youth_happy.wav
2797,neutral,Tess/YAF_youth_neutral.wav
2798,surprise,Tess/YAF_youth_ps.wav


In [5]:
# extract features from data
mfcc_audios = []

for path in Tess_df['Path']:
    x , sr = librosa.load(path, sr=None)
    mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sr).T, axis=0)
    
    mfcc_audios.append(mfcc)
    


In [6]:
mfcc_audios = np.array(mfcc_audios)
mfcc_audios

array([[-3.9003134e+02,  5.5235970e+01, -1.5034650e+01, ...,
        -3.2613571e+00, -3.6698551e+00, -4.0404305e+00],
       [-4.8661041e+02,  9.0023979e+01,  1.3257611e+01, ...,
        -5.3382254e-01,  1.4373827e+00, -1.4682710e+00],
       [-4.3724438e+02,  6.0606316e+01,  4.7354970e+00, ...,
        -3.0597289e+00,  7.0483537e+00, -3.2661567e+00],
       ...,
       [-4.1281674e+02,  6.2643398e+01,  8.9996767e+00, ...,
         9.0151281e+00, -1.3266459e+01, -4.3487201e+00],
       [-3.7355783e+02,  3.8703201e+01, -3.6885612e+00, ...,
         7.1812916e+00, -6.4076841e-02,  1.3013121e+00],
       [-4.1227975e+02,  5.3551804e+01,  3.1432125e+01, ...,
        -7.1320337e-01, -5.9225287e+00, -2.3588972e+00]], dtype=float32)

In [16]:
def get_delta_features(mfccs):
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    mfccs_features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs),axis=1)
    return mfccs_features

mfcc_audios = get_delta_features(mfcc_audios)
mfcc_audios.shape


(2800, 180)

In [17]:
# scale features
scaler = preprocessing.MaxAbsScaler()
X = scaler.fit_transform(mfcc_audios)


array([[-6.6538781e-01,  4.5902681e-01, -3.0711496e-01, ...,
        -1.9879920e-04, -1.9879920e-04, -1.9879920e-04],
       [-8.3015031e-01,  7.4812520e-01,  2.7081513e-01, ...,
         1.5049429e-01,  1.5049429e-01,  1.5049429e-01],
       [-7.4593258e-01,  5.0365597e-01,  9.6732676e-02, ...,
         3.0856597e-01,  3.0856597e-01,  3.0856597e-01],
       ...,
       [-7.0425940e-01,  5.2058470e-01,  1.8383768e-01, ...,
         2.6869603e-02,  2.6869603e-02,  2.6869603e-02],
       [-6.3728422e-01,  3.2163474e-01, -7.5346768e-02, ...,
        -5.9018925e-02, -5.9018925e-02, -5.9018925e-02],
       [-7.0334327e-01,  4.4503093e-01,  6.4206851e-01, ...,
        -9.7627394e-02, -9.7627394e-02, -9.7627394e-02]], dtype=float32)

In [56]:
y = np.array(Tess_df['Emotions'])

# dimensionality reduction
X_dr = PCA(n_components=30).fit_transform(X,y)


In [57]:
X_train, X_test, y_train, y_test = train_test_split(X_dr, y, test_size=0.2)

In [63]:
# # using logistic regression
# clf = LogisticRegression(multi_class='auto', solver='lbfgs', max_iter=1000)

# # using decision tree 
# clf = DecisionTreeClassifier()

# Using support vector machine classifier
clf2 = svm.SVC(C=3)

# using k-nearest-neighbours
# clf = KNeighborsClassifier()
# clf.fit(X_train, y_train)

# #  using bagging ensemble learning
clf3 = BaggingClassifier(estimator=svm.SVC(C=3),n_estimators=20, random_state=0)

# using boosting ensemble
# clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
clf1 = HistGradientBoostingClassifier(max_iter=100)
# # using voting 
clf = VotingClassifier(estimators=[('hgbc', clf1), ('svc', clf2), ('bgclf', clf3)], voting='hard')


# using stacking ensemble learning
# def get_stacking():
#     # define the base models
#     level0 = list()

#     level0.append(('knn', KNeighborsClassifier()))
#     level0.append(('svc', svm.SVC(C=3)))
#     level0.append(('bagclf', BaggingClassifier(estimator=svm.SVC(C=3),n_estimators=20, random_state=0)))
#     level0.append(('vboostclf', VotingClassifier(estimators=[('hgbc', clf1), ('svc', clf2), ('bgclf', clf3)], voting='hard')))
#     # define meta learner model
#     level1 = svm.SVC(C=3)
#     # define the stacking ensemble
#     model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
#     return model
# clf = get_stacking()



clf.fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=10)
print("%0.4f accuracy with a standard deviation of %0.4f" % (mean(scores), std(scores)))

y_pred = clf.predict(X_test)
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)

Accuracy: 0.9892857142857143
Classification Report:
               precision    recall  f1-score   support

       angry       1.00      0.99      0.99        91
     disgust       0.99      1.00      0.99        82
        fear       0.99      0.99      0.99        71
       happy       0.99      0.96      0.97        80
     neutral       0.99      1.00      0.99        85
         sad       1.00      1.00      1.00        77
    surprise       0.97      0.99      0.98        74

    accuracy                           0.99       560
   macro avg       0.99      0.99      0.99       560
weighted avg       0.99      0.99      0.99       560



In [None]:
# y_pred = clf.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)


# print("Accuracy:", accuracy)
# print("Classification Report:\n", report)

# # # plot model performance for comparison
# pyplot.boxplot(results, labels=names, showmeans=True)
# pyplot.show()

In [18]:
# # using neural networks
# encoder = LabelEncoder()
# encoder.fit(y)
# encoded_y = encoder.transform(y)
# dummy_y = np_utils.to_categorical(encoded_y)
# encoded_y