In [1]:
#Importing the necessary libraries
import librosa 
import soundfile 
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [6]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
            result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
#         if chroma:
#             chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
#             result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [7]:
f=extract_feature('./Real Time Testing/Check.wav', mfcc=True, chroma=True, mel=True)

In [8]:
#DataFlair - Emotions in the RAVDESS dataset
people={
  '1':'Rahil',
  '2':'Shikha',
  '3':'Srihari',
  '4':'Yash',
}

In [9]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/Users/yashdange/Desktop/BE Project/Audio/Train 2 Fixed/*.wav"):
        file_name=os.path.basename(file)
        person=people[file_name.split("-")[1]]
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(person)
    return x,y

In [10]:
x,y=[],[]
for file in glob.iglob(r"/Users/yashdange/Desktop/BE Project/Audio/Train 2 Fixed/*.wav"):
    file_name=os.path.basename(file)
    person=people[file_name.split("-")[1]]
    feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
    x.append(feature)
    y.append(person)

In [11]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [12]:
len(y)

60

In [13]:
#DataFlair - Train the model
model.fit(x,y)



MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [21]:
import pyaudio
import wave

FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 512
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "./Real Time Testing/Check.wav"
device_index = 2
audio = pyaudio.PyAudio()

print("----------------------record device list---------------------")
info = audio.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
        if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
            print("Input Device id ", i, " - ", audio.get_device_info_by_host_api_device_index(0, i).get('name'))

print("-------------------------------------------------------------")

index = int(input())
print("recording via index "+str(index))

stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,input_device_index = index,
                frames_per_buffer=CHUNK)
print ("recording started")
Recordframes = []
 
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    Recordframes.append(data)
print ("recording stopped")
 
stream.stop_stream()
stream.close()
audio.terminate()
 
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(Recordframes))
waveFile.close()

## TEST

test_file = './Real Time Testing/Check.wav'
sample = extract_feature(test_file, mfcc=True, chroma=True, mel=True)
sample=sample.reshape(1,-1)

y_pred=model.predict(sample.reshape(1,-1))
y_pred

print('The Speaker is:',y_pred)

----------------------record device list---------------------
Input Device id  0  -  MacBook Pro Microphone
Input Device id  2  -  Reincubate
Input Device id  3  -  Microsoft Teams Audio
Input Device id  4  -  EpocCam Microphone
Input Device id  5  -  ZoomAudioDevice
-------------------------------------------------------------
0
recording via index 0
recording started
recording stopped
The Speaker is:  ['Srihari']


In [16]:
test_file = './Real Time Testing/Check.wav'
sample = extract_feature(test_file, mfcc=True, chroma=True, mel=True)
sample=sample.reshape(1,-1)

In [20]:
#DataFlair - Predict for the test set
y_pred=model.predict(sample.reshape(1,-1))
y_pred

array(['Rahil'], dtype='<U7')

In [16]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
clf=GridSearchCV(SVC(gamma='auto'),{
    'C': [5,10,15],
    'kernel':['rbf','linear']
},cv=5,return_train_score=False)
clf.fit(x_train,y_train)
clf.cv_results_



{'mean_fit_time': array([0.00122628, 0.00066848, 0.00051751, 0.0005147 , 0.0004981 ,
        0.0004602 ]),
 'std_fit_time': array([1.13591224e-03, 1.48498100e-04, 5.93772365e-06, 4.73817323e-05,
        4.07006143e-05, 2.91559311e-05]),
 'mean_score_time': array([0.00037613, 0.00035901, 0.00029149, 0.0002748 , 0.00029345,
        0.00026522]),
 'std_score_time': array([4.74604146e-05, 7.78518113e-05, 4.98197856e-06, 1.35178078e-05,
        6.15741357e-05, 2.47315351e-05]),
 'param_C': masked_array(data=[5, 5, 10, 10, 15, 15],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 5, 'kernel': 'rbf'},
  {'C': 5, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 15, 'ker

In [17]:
df=pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001226,0.001136,0.000376,4.7e-05,5,rbf,"{'C': 5, 'kernel': 'rbf'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,4
1,0.000668,0.000148,0.000359,7.8e-05,5,linear,"{'C': 5, 'kernel': 'linear'}",1.0,0.666667,1.0,1.0,1.0,0.933333,0.133333,1
2,0.000518,6e-06,0.000291,5e-06,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,4
3,0.000515,4.7e-05,0.000275,1.4e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,0.666667,1.0,1.0,1.0,0.933333,0.133333,1
4,0.000498,4.1e-05,0.000293,6.2e-05,15,rbf,"{'C': 15, 'kernel': 'rbf'}",0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,4
5,0.00046,2.9e-05,0.000265,2.5e-05,15,linear,"{'C': 15, 'kernel': 'linear'}",1.0,0.666667,1.0,1.0,1.0,0.933333,0.133333,1


In [18]:
df[['param_C','param_kernel','mean_test_score','rank_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
0,5,rbf,0.333333,4
1,5,linear,0.933333,1
2,10,rbf,0.333333,4
3,10,linear,0.933333,1
4,15,rbf,0.333333,4
5,15,linear,0.933333,1


In [19]:
model2=SVC()

In [20]:
model2.fit(x_train,y_train)

SVC()

In [21]:
model2.predict(sample.reshape(1,-1))

array(['yash'], dtype='<U7')

In [22]:
test_file2 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/shikha_zero1.wav'
sample2 = extract_feature(test_file2, mfcc=True, chroma=True, mel=True)
sample2 = sample2.reshape(1,-1)

test_file3 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/shikha_zero2.wav'
sample3 = extract_feature(test_file3, mfcc=True, chroma=True, mel=True)
sample3 = sample3.reshape(1,-1)

In [23]:
#MLP
model.predict(sample2)

array(['shikha'], dtype='<U7')

In [24]:
#SVM
model2.predict(sample2)

array(['yash'], dtype='<U7')

In [25]:
model.predict(sample3)

array(['shikha'], dtype='<U7')

In [26]:
model2.predict(sample3)

array(['yash'], dtype='<U7')

In [None]:
import tensorflow as tf
from tensorflow import keras

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/opt/anaconda3/envs/IBM_Deep_Learning/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3427, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-31-3d1e6d42ad48>", line 1, in <module>
    import tensorflow as tf
  File "/opt/anaconda3/envs/IBM_Deep_Learning/lib/python3.7/site-packages/tensorflow/__init__.py", line 98, in <module>
    from tensorflow_core import *
  File "/opt/anaconda3/envs/IBM_Deep_Learning/lib/python3.7/site-packages/tensorflow_core/__init__.py", line 40, in <module>
    from tensorflow.python.tools import module_util as _module_util
  File "<frozen importlib._bootstrap>", line 983, in _find_and_load
  File "<frozen importlib._bootstrap>", line 959, in _find_and_load_unlocked
  File "/opt/anaconda3/envs/IBM_Deep_Learning/lib/python3.7/site-packages/tensorflow/__init__.py", line 50, in __getattr__
    module = self._load()
  File "/opt/anaconda3/envs/IBM_Deep_Learning/li

In [1]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
y_train_encoded= le.fit_transform(y_train)

NameError: name 'y_train' is not defined

In [32]:
y_train_encoded

array([1, 3, 3, 2, 1, 1, 3, 0, 2, 0, 3, 2, 3, 1, 0])

In [33]:
x_train.shape

(15, 168)

In [34]:
y_train_np = np.array(y_train)
y_train_np.shape

(15,)

In [None]:
#Simple neural network
model3 = keras.Sequential([
    keras.layers.Dense(100,input_shape=(168,), activation = 'sigmoid'),
    keras.layers.Dense(5, activation = 'sigmoid'),
    #keras.layers.Dense(5, activation = 'sigmoid'),
])

model3.compile(
    optimizer = 'adam',
    loss = 'sparse_categorical_crossentropy',
    metrics = ['accuracy']
)
model3.fit(x_train,y_train_encoded,epochs = 160)t

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



In [1]:
y_test_encoded = le.fit_transform(y_test)

NameError: name 'le' is not defined

In [1]:
prediction = np.argmax(model3.predict(sample2))
prediction = '0' + str(prediction+1)
person_prediction = people[prediction]
person_prediction

NameError: name 'np' is not defined

In [2]:
test_file4 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/Rahil0_1.wav'
sample4 = extract_feature(test_file4, mfcc=True, chroma=True, mel=True)
sample4 = sample2.reshape(1,-1)

test_file5 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/Rahil0_2.wav'
sample5 = extract_feature(test_file5, mfcc=True, chroma=True, mel=True)
sample5 = sample3.reshape(1,-1)

test_file6 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/Rahil0_3.wav'
sample6 = extract_feature(test_file6, mfcc=True, chroma=True, mel=True)
sample6 = sample2.reshape(1,-1)

test_file7 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/Rahil0_4.wav'
sample7 = extract_feature(test_file7, mfcc=True, chroma=True, mel=True)
sample7 = sample3.reshape(1,-1)

test_file8 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/Rahil0_5.wav'
sample8 = extract_feature(test_file8, mfcc=True, chroma=True, mel=True)
sample8 = sample2.reshape(1,-1)

test_file9 = '/Users/yashdange/Desktop/BE Project/Audio/Test Refined/srihari_0.wav'
sample9 = extract_feature(test_file9, mfcc=True, chroma=True, mel=True)
sample9 = sample3.reshape(1,-1)

NameError: name 'extract_feature' is not defined

In [53]:
model2.predict(sample9)

array(['rahil'], dtype='<U7')