In [2657]:
import os
Root = "./Ravdess_data"
os.chdir(Root)
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


#Extract features (mfcc, chroma, mel) from a soundfile

In [2658]:

def extract_features(file_name, mfcc, chroma,mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result= np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result=np.hstack( (result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
            result=np.hstack((result, mel))
    return result


# Emotions in the RAVDESS dataset

In [2659]:

emotions={
    '01':'neutral',
    '02':'calm',
    '03':'happy',
    '04':'sad',
    '05':'angry',
    '06':'fearful',
    '07':'disgust',
    '08':'surprised',
}

emotions to observe

In [2660]:
observed_emotions=['neutral', 'happy', 'fearful', 'sad']

Load the data and extract features for each sound file

In [2661]:
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("D:/Code/Ravdess_data/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_features(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

Split the dataset

In [2662]:
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

  mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
  0.0000000e+00 -3.0517578e-05] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel=np.mean(librosa.feature.melspectrogram

In [2663]:
x_train

array([[-6.94892578e+02,  4.35764313e+01, -1.57894535e+01, ...,
         3.11573640e-05,  2.71380668e-05,  1.23528062e-05],
       [-6.18971680e+02,  6.65267563e+01,  2.25760603e+00, ...,
         9.56115327e-05,  6.01734901e-05,  3.39367944e-05],
       [-4.58362427e+02,  7.41205902e+01, -2.46246853e+01, ...,
         7.25394857e-05,  4.16039074e-05,  2.20797465e-05],
       ...,
       [-6.18532654e+02,  3.48936996e+01, -6.92095470e+00, ...,
         1.51414803e-04,  1.04732688e-04,  7.10216918e-05],
       [-7.00413757e+02,  7.17063370e+01,  1.32325602e+01, ...,
         2.94743245e-06,  1.03271952e-06,  6.27505983e-07],
       [-5.99402588e+02,  3.05057697e+01, -6.70744085e+00, ...,
         2.11430626e-04,  3.26338486e-04,  1.30003464e-04]])

Get the shape of the training and testing datasets

In [2664]:
print((x_train.shape[0], x_test.shape[0]))

(504, 168)


Get the number of features extracted

In [2665]:
print(f'features extracted: {x_train.shape[1]}')

features extracted: 180


Initialize the Multi Layer Perception Classifier

In [2666]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=600)

Train the model

In [2667]:
model.fit(x_train, y_train)



MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=600)

Predict for the set

In [2668]:
y_pred=model.predict(x_test)

In [2669]:
y_pred

array(['neutral', 'neutral', 'sad', 'neutral', 'fearful', 'fearful',
       'happy', 'happy', 'sad', 'happy', 'sad', 'happy', 'happy', 'happy',
       'sad', 'fearful', 'neutral', 'neutral', 'sad', 'neutral', 'happy',
       'sad', 'sad', 'fearful', 'happy', 'happy', 'fearful', 'fearful',
       'neutral', 'fearful', 'neutral', 'happy', 'neutral', 'happy',
       'sad', 'sad', 'fearful', 'neutral', 'happy', 'sad', 'happy',
       'happy', 'happy', 'happy', 'happy', 'sad', 'sad', 'sad', 'fearful',
       'fearful', 'fearful', 'fearful', 'sad', 'neutral', 'happy', 'sad',
       'happy', 'neutral', 'neutral', 'sad', 'fearful', 'neutral', 'sad',
       'fearful', 'happy', 'sad', 'happy', 'fearful', 'sad', 'sad', 'sad',
       'fearful', 'neutral', 'sad', 'neutral', 'fearful', 'fearful',
       'neutral', 'happy', 'fearful', 'happy', 'fearful', 'happy', 'sad',
       'neutral', 'happy', 'sad', 'neutral', 'fearful', 'fearful',
       'fearful', 'fearful', 'fearful', 'sad', 'neutral', 'sad',


Calculate the accuracy of our model

In [2670]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

Print the accuracy

In [2671]:
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 67.26%


In [2672]:
from sklearn.metrics import accuracy_score, f1_score

In [2673]:
f1_score(y_test, y_pred,average=None)

array([0.67368421, 0.6741573 , 0.74074074, 0.63265306])

In [2674]:
import pandas as pd
df=pd.DataFrame({'Actual': y_test, 'Predicted':y_pred})
df.head(20)

Unnamed: 0,Actual,Predicted
0,neutral,neutral
1,sad,neutral
2,sad,sad
3,sad,neutral
4,fearful,fearful
5,happy,fearful
6,happy,happy
7,sad,happy
8,fearful,sad
9,happy,happy


In [2675]:
import pickle
#Writing different model files to file
filename = 'modelForPrediction1'
pickle.dump(model,open(filename,'wb'))
    

loaded_model = pickle.load(open(filename, 'rb'))  #loading the model file from the storage

In [2676]:
feature=extract_features("D:/Code/Ravdess_data/Actor_15/03-01-01-01-01-01-15.wav", mfcc=True, chroma=True, mel=True)

  mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)


In [2677]:
feature=feature.reshape(1,-1)

In [2678]:
prediction=loaded_model.predict(feature)
print(prediction[0])

neutral


In [2679]:
feature

array([[-6.23513062e+02,  6.43129349e+01, -5.59814835e+00,
         1.84341869e+01,  2.30003214e+00, -4.21763003e-01,
        -8.61682594e-01, -4.63177729e+00, -2.86865282e+00,
        -1.13252413e+00, -1.95075050e-01, -2.14656666e-01,
         1.18698037e+00, -1.87612861e-01,  1.73329294e+00,
        -9.19283032e-01,  7.97187805e-01,  3.95105267e+00,
        -2.44392705e+00,  1.64568031e+00, -1.62316144e+00,
        -5.03214836e-01, -1.82159078e+00,  7.70089269e-01,
        -1.59531474e+00,  3.54147166e-01,  1.55689254e-01,
         1.98439014e+00, -8.25566709e-01,  1.21198082e+00,
        -1.08110714e+00,  5.59319735e-01, -5.36373593e-02,
         8.65986466e-01,  7.40592122e-01,  6.04718089e-01,
         6.09368086e-01,  1.79395831e+00,  3.10060549e+00,
         2.60126996e+00,  6.65221095e-01,  7.25663066e-01,
         7.58910656e-01,  7.29048848e-01,  7.04039514e-01,
         7.26167202e-01,  7.17872620e-01,  7.76633263e-01,
         7.29013205e-01,  7.40402579e-01,  7.67991066e-0