# **1. Load Necessary Library**

In [26]:
import librosa
import tensorflow as tf
import numpy as np

SAVED_MODEL_PATH = "./keyword_model.h5"
SAMPLES_TO_CONSIDER = 22050

# **2. Build Keyword spotting Function**

In [27]:
class _Keyword_Spotting_Service:

    model = None
    _mapping = [
        "down",
        "go",
        "stop",
        "up"
            ]
    _instance = None


    def predict(self, file_path):

        # extract MFCC
        MFCCs = self.preprocess(file_path)

        # we need a 4-dim array to feed to the model for prediction: (# samples, # time steps, # coefficients, 1)
        MFCCs = MFCCs[np.newaxis, ..., np.newaxis]

        # get the predicted label
        predictions = self.model.predict(MFCCs)
        predicted_index = np.argmax(predictions)
        predicted_keyword = self._mapping[predicted_index]
        return predicted_keyword


    def preprocess(self, file_path, num_mfcc=13, n_fft=2048, hop_length=512):
        # load audio file
        signal, sample_rate = librosa.load(file_path)

        if len(signal) >= SAMPLES_TO_CONSIDER:
            # ensure consistency of the length of the signal
            signal = signal[:SAMPLES_TO_CONSIDER]

            # extract MFCCs
            MFCCs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft,
                                         hop_length=hop_length)
        return MFCCs.T



In [28]:
def Keyword_Spotting_Service():

    # ensure an instance is created only the first time the factory function is called
    if _Keyword_Spotting_Service._instance is None:
        _Keyword_Spotting_Service._instance = _Keyword_Spotting_Service()
        _Keyword_Spotting_Service.model = tf.keras.models.load_model(SAVED_MODEL_PATH)
    return _Keyword_Spotting_Service._instance

# **3.Prediction**

In [29]:
# create 2 instances of the keyword spotting service
kss = Keyword_Spotting_Service()
kss1 = Keyword_Spotting_Service()
# check that different instances of the keyword spotting service point back to the same object (singleton)
assert kss is kss1

In [45]:
import pandas
data=pandas.read_csv('key_audio_4.csv')
data

Unnamed: 0,file_name,key
0,./inputs/Voice 001.wav,up
1,./inputs/Voice 002.wav,down
2,./inputs/Voice 003.wav,go
3,./inputs/Voice 004.wav,up
4,./inputs/Voice 005.wav,go
5,./inputs/Voice 006.wav,down
6,./inputs/Voice 007.wav,up
7,./inputs/Voice 008.wav,stop
8,./inputs/Voice 009.wav,stop
9,./inputs/Voice 010.wav,go


In [46]:
prediction=[]
def travis(row):
    keyword = kss.predict(row.file_name)
    prediction.append(keyword)

out=data.apply(lambda row: travis(row),axis=1)

data['prediction']=prediction
data



Unnamed: 0,file_name,key,prediction
0,./inputs/Voice 001.wav,up,up
1,./inputs/Voice 002.wav,down,stop
2,./inputs/Voice 003.wav,go,go
3,./inputs/Voice 004.wav,up,up
4,./inputs/Voice 005.wav,go,up
5,./inputs/Voice 006.wav,down,up
6,./inputs/Voice 007.wav,up,up
7,./inputs/Voice 008.wav,stop,stop
8,./inputs/Voice 009.wav,stop,stop
9,./inputs/Voice 010.wav,go,go


In [47]:
def sentence_accuracy(text1, text2):
    return text1==text2

# Calculate accuracy for each row
accuracies = []
for text1, text2 in zip(data['key'], data['prediction']):
    text2=text2.upper()
    text1=text1.upper()
    accuracy = sentence_accuracy(text1, text2)
    accuracies.append(accuracy)

# Add the accuracies as a new column in the DataFrame
data['sentence_level_accuracy'] = accuracies

# Calculate the overall average accuracy
average_accuracy = data['sentence_level_accuracy'].mean()

# Print the results
print(f"Average Accuracy: {average_accuracy:.2%}")

Average Accuracy: 75.00%


In [48]:
data

Unnamed: 0,file_name,key,prediction,sentence_level_accuracy
0,./inputs/Voice 001.wav,up,up,True
1,./inputs/Voice 002.wav,down,stop,False
2,./inputs/Voice 003.wav,go,go,True
3,./inputs/Voice 004.wav,up,up,True
4,./inputs/Voice 005.wav,go,up,False
5,./inputs/Voice 006.wav,down,up,False
6,./inputs/Voice 007.wav,up,up,True
7,./inputs/Voice 008.wav,stop,stop,True
8,./inputs/Voice 009.wav,stop,stop,True
9,./inputs/Voice 010.wav,go,go,True
