In [67]:
#Importing the necessary libraries
import librosa 
import soundfile 
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [2]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
            result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
#         if chroma:
#             chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
#             result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [33]:
#DataFlair - Emotions in the RAVDESS dataset
people={
  '01':'yash',
  '02':'srihari',
  '03':'shikha',
  '04':'rahil',
}

In [57]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/Users/yashdange/Desktop/BE Project/Audio/Fixed Dataset/*.wav"):
        file_name=os.path.basename(file)
        person=people[file_name.split("-")[2]]
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(person)
    return x,y

In [62]:
#DataFlair - Load the data and extract features for each sound file
def load_data2(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/Users/yashdange/Desktop/BE Project/Audio/Fixed Dataset/*.wav"):
        file_name=os.path.basename(file)
        person=people[file_name.split("-")[2]]
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(person)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [64]:
x_train,x_test,y_train,y_test=load_data2(test_size=0.25)

In [35]:
x,y=[],[]
for file in glob.iglob(r"/Users/yashdange/Desktop/BE Project/Audio/Fixed Dataset/*.wav"):
    file_name=os.path.basename(file)
    person=people[file_name.split("-")[2]]
    feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
    x.append(feature)
    y.append(person)

In [72]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [42]:
len(y)

20

In [45]:
#DataFlair - Train the model
model.fit(x,y)



MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [52]:
test_file = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/yash_zero.wav'
sample = extract_feature(test_file, mfcc=True, chroma=True, mel=True)

In [55]:
#DataFlair - Predict for the test set
y_pred=model.predict(sample.reshape(1,-1))
y_pred

array(['rahil'], dtype='<U7')

In [65]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
clf=GridSearchCV(SVC(gamma='auto'),{
    'C': [5,10,15],
    'kernel':['rbf','linear']
},cv=5,return_train_score=False)
clf.fit(x_train,y_train)
clf.cv_results_



{'mean_fit_time': array([0.11430731, 0.00048208, 0.00047646, 0.00046487, 0.00047636,
        0.00041909]),
 'std_fit_time': array([2.27643429e-01, 5.53889002e-05, 4.60160458e-05, 6.47074173e-05,
        8.29014004e-05, 2.12218787e-05]),
 'mean_score_time': array([0.00052452, 0.00024319, 0.00025115, 0.00026388, 0.00024095,
        0.00024104]),
 'std_score_time': array([5.53040723e-04, 1.63694800e-05, 2.60007895e-05, 3.55179352e-05,
        8.92411920e-06, 3.19879198e-05]),
 'param_C': masked_array(data=[5, 5, 10, 10, 15, 15],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 5, 'kernel': 'rbf'},
  {'C': 5, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 15, 'ker

In [68]:
df=pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.114307,0.227643,0.000525,0.000553,5,rbf,"{'C': 5, 'kernel': 'rbf'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,4
1,0.000482,5.5e-05,0.000243,1.6e-05,5,linear,"{'C': 5, 'kernel': 'linear'}",1.0,0.666667,1.0,1.0,1.0,0.933333,0.133333,1
2,0.000476,4.6e-05,0.000251,2.6e-05,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,4
3,0.000465,6.5e-05,0.000264,3.6e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,0.666667,1.0,1.0,1.0,0.933333,0.133333,1
4,0.000476,8.3e-05,0.000241,9e-06,15,rbf,"{'C': 15, 'kernel': 'rbf'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,4
5,0.000419,2.1e-05,0.000241,3.2e-05,15,linear,"{'C': 15, 'kernel': 'linear'}",1.0,0.666667,1.0,1.0,1.0,0.933333,0.133333,1


In [69]:
df[['param_C','param_kernel','mean_test_score','rank_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
0,5,rbf,0.333333,4
1,5,linear,0.933333,1
2,10,rbf,0.333333,4
3,10,linear,0.933333,1
4,15,rbf,0.333333,4
5,15,linear,0.933333,1


In [71]:
model2=SVC()

In [74]:
model2.fit(x_train,y_train)

SVC()

In [76]:
model2.predict(sample.reshape(1,-1))

array(['yash'], dtype='<U7')

In [81]:
test_file2 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/shikha_zero1.wav'
sample2 = extract_feature(test_file2, mfcc=True, chroma=True, mel=True)
sample2 = sample2.reshape(1,-1)

test_file3 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/shikha_zero2.wav'
sample3 = extract_feature(test_file3, mfcc=True, chroma=True, mel=True)
sample3 = sample3.reshape(1,-1)

In [82]:
model.predict(sample2)

array(['shikha'], dtype='<U7')

In [84]:
model2.predict(sample2)

array(['yash'], dtype='<U7')

In [85]:
model.predict(sample3)

array(['shikha'], dtype='<U7')

In [87]:
model2.predict(sample3)

array(['yash'], dtype='<U7')

In [88]:
import tensorflow as tf
from tensorflow import keras

ModuleNotFoundError: No module named 'tensorflow'