In [4]:
import soundfile as sf
import numpy as np
import librosa
import glob
import os
import pickle
import tensorflow
from sklearn.model_selection import train_test_split 
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import speech_recognition as spr

Note: you may need to restart the kernel to use updated packages.


In [7]:
def feature_extraction(fileName,mfcc,chroma,mel):
    with sf.SoundFile(fileName) as file:
        sound = file.read(dtype='float32')
        sample_rate = file.samplerate     
        if chroma:         
            stft = np.abs(librosa.stft(sound))
        feature = np.array([])               
        if mfcc:
            mfcc = np.mean(librosa.feature.mfcc(y=sound,sr=sample_rate,n_mfcc=40).T,axis=0)
            feature =np.hstack((feature,mfcc))
        if chroma:
            chroma =  np.mean(librosa.feature.chroma_stft(S=stft,sr=sample_rate).T,axis=0)
            feature = np.hstack((feature,chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=sound,sr=sample_rate).T,axis=0)
            feature =np.hstack((feature,mel))
        return feature 

In [8]:
int_emotion = {"01": "neutral", "02": "calm", "03": "happy", "04": "sad","05": "angry","06": "fearful","07": "disgust","08": "surprised"}
EMOTIONS = {"happy","sad","neutral","angry"}

In [13]:
def train_test_data(test_size=0.25):
    features, emotions = [],[] #intializing features and emotions
    for file in glob.glob("C:\\Users\\vmuku\\OneDrive\\Desktop\Data science\\data\\actor_*\\*.wav"):
        fileName = os.path.basename(file)  
        emotion = int_emotion[fileName.split("-")[2]] 
        if emotion not in EMOTIONS:
            continue
        feature=feature_extraction(file,mfcc=True,chroma=True,mel=True,) 
        features.append(feature)
        emotions.append(emotion)
    return train_test_split(np.array(features),emotions, test_size=test_size, random_state=7)

In [14]:
X_train,X_test,y_train,y_test=train_test_data(test_size=0.3)

In [15]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(400,), learning_rate='adaptive', max_iter=1000)

In [16]:
model.fit(X_train,y_train)

MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(400,),
              learning_rate='adaptive', max_iter=1000)

In [17]:
y_pred = model.predict(X_test)

In [102]:
path = 'C:\\Users\\vmuku\\OneDrive\\Desktop\\Data science\\data'
r = sr.Recognizer()
filelist = []

for root, dirs, files in os.walk(path):
	for file in files:
       
		filelist.append(os.path.join(root,file))


for name in filelist:
    with sr.AudioFile(name) as source:
        audio_data = r.record(source)
        try:
            text = r.recognize_google(audio_data, language="en-IN")
            print(text)
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
            
           
            

talking by the door
kids talking by the door


KeyboardInterrupt: 

In [17]:
accuracy = accuracy_score(y_true=y_test,y_pred=y_pred)
accuracy

0.698019801980198

In [28]:
if not os.path.isdir("model"): 
   os.mkdir("model") 
pickle.dump(model, open("model/mlp_classifier.model", "wb"))