In [1]:
#Importing the necessary libraries
import librosa 
import soundfile 
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import pickle

In [2]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
            result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
#         if chroma:
#             chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
#             result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [3]:
#People and their Assigned Lables
people={
  '1':'Rahil',
  '2':'Shikha',
  '3':'Srihari',
  '4':'Yash',
}

In [4]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/Users/yashdange/Desktop/BE Project/Audio/Train 2 Fixed/*.wav"):
        file_name=os.path.basename(file)
        person=people[file_name.split("-")[1]]
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(person)
    return x,y

In [5]:
x,y=[],[]
for file in glob.iglob(r"/Users/yashdange/Desktop/BE Project/Audio/Train 2 Fixed/*.wav"):
    file_name=os.path.basename(file)
    person=people[file_name.split("-")[1]]
    feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
    x.append(feature)
    y.append(person)

In [6]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [7]:
len(y)

60

In [8]:
#DataFlair - Train the model
model.fit(x,y)



MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [10]:
import pyaudio
import wave

FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 512
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "./Real Time Testing/Check.wav"
device_index = 2
audio = pyaudio.PyAudio()

print("----------------------record device list---------------------")
info = audio.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
        if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
            print("Input Device id ", i, " - ", audio.get_device_info_by_host_api_device_index(0, i).get('name'))

print("-------------------------------------------------------------")

index = int(input())
print("recording via index "+str(index))

stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,input_device_index = index,
                frames_per_buffer=CHUNK)
print ("recording started")
Recordframes = []
 
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    Recordframes.append(data)
print ("recording stopped")
 
stream.stop_stream()
stream.close()
audio.terminate()
 
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(Recordframes))
waveFile.close()

## TEST

test_file = './Real Time Testing/Check.wav'
sample = extract_feature(test_file, mfcc=True, chroma=True, mel=True)
sample=sample.reshape(1,-1)

y_pred=model.predict(sample.reshape(1,-1))
y_pred

print('The Speaker is:',y_pred)

----------------------record device list---------------------
Input Device id  0  -  MacBook Pro Microphone
Input Device id  2  -  Reincubate
Input Device id  3  -  Microsoft Teams Audio
Input Device id  4  -  EpocCam Microphone
Input Device id  5  -  VB-Cable
Input Device id  6  -  ZoomAudioDevice
-------------------------------------------------------------
0
recording via index 0
recording started
recording stopped
The Speaker is: ['Rahil']


In [11]:
test=extract_feature('./Test 2 fixed/rag.wav', mfcc=True, chroma=True, mel=True)
test=test.reshape(1,-1)
model.predict(test)

array(['Rahil'], dtype='<U7')