# Keyword spotting from Speech

### Import relevant packages/modules

In [21]:
import os
import pkg_resources
from pydub import AudioSegment
from pydub.silence import split_on_silence
import speech_recognition as sr
from symspellpy import SymSpell, Verbosity
from flashtext import KeywordProcessor

In [8]:
# init variables and clear slice files
Speech = None
slices = None
slice_filenames=[]

if os.path.isdir('./speech-slices'):
    fileList = os.listdir('./speech-slices')
    for doc in fileList:
        os.remove(os.path.join('./speech-slices',doc))

# open the audio file using pydub
path = os.path.join(os.curdir,'eng.wav')
Speech = AudioSegment.from_wav(path)

# Split the speech file into slices where a gap of atleast 500 milliseconds
slices = split_on_silence(Speech,
    min_silence_len=500,
    silence_thresh=Speech.dBFS,
    keep_silence=500)

# Create a Folder to store the split slices of audio files
if not os.path.isdir('./speech-slices'):
    os.mkdir('./speech-slices')

for i, slice in enumerate(slices,start=1):
    slice_filename = os.path.join('./speech-slices',f'speech_slice{i}.wav')
    slice.export(slice_filename,format="wav")
    slice_filenames.append(slice_filename)

In [11]:
# Initialize variables
asr = sr.Recognizer()
text_corpus=""
textMap={}

# convert the audio slices to text
for filename in slice_filenames:
    with sr.AudioFile(filename) as source:
        audio_source = asr.record(source)
        try:
            text=asr.recognize_google(audio_source)
        except  sr.UnknownValueError as e:
            print("Error Occurred: ", str(e))
        else:
            text=f"{text.capitalize()}. "
            text_corpus += text
            textMap[filename] = text


Error Occurred:  
Error Occurred:  
Error Occurred:  
Error Occurred:  


In [12]:
print(textMap)

{'./speech-slices\\speech_slice2.wav': 'Do you like manchester united. ', './speech-slices\\speech_slice3.wav': 'What is uk. ', './speech-slices\\speech_slice4.wav': 'I having fun. ', './speech-slices\\speech_slice5.wav': 'Yeah. ', './speech-slices\\speech_slice7.wav': 'What do you like the bass. ', './speech-slices\\speech_slice8.wav': 'Devika. ', './speech-slices\\speech_slice9.wav': 'The people here are. ', './speech-slices\\speech_slice10.wav': 'You like this. ', './speech-slices\\speech_slice11.wav': 'Yeah. ', './speech-slices\\speech_slice12.wav': 'Where is great. ', './speech-slices\\speech_slice13.wav': 'Is this field too spicy for you. ', './speech-slices\\speech_slice15.wav': 'I love. ', './speech-slices\\speech_slice17.wav': 'Everyday. ', './speech-slices\\speech_slice18.wav': 'How much longer you stay. ', './speech-slices\\speech_slice19.wav': 'When you go. '}


In [18]:
# perform spell correction
spellCorrectedTxtMap={}
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
dictionary_path = pkg_resources.resource_filename("symspellpy","frequency_dictionary_en_82_765.txt")
bigram_path = pkg_resources.resource_filename("symspellpy", "frequency_bigramdictionary_en_243_342.txt")


for val in textMap.keys():
    text = textMap[val]
    sym_spell.load_dictionary(dictionary_path,term_index=0,count_index=1)
    sym_spell.load_bigram_dictionary(bigram_path,term_index=0,count_index=2)

    suggestions = sym_spell.lookup_compound(text, max_edit_distance=2)

    sent=[]
    for suggestion in suggestions:
        sent.append(suggestion)
    
    predicted_sentence = str(sent[0].term)
    splitter = predicted_sentence

    spellCorrectedTxtMap[val] = splitter
    

In [25]:
#print(spellCorrectedTxtMap)

for val in spellCorrectedTxtMap.keys():
    print(spellCorrectedTxtMap[val])

do you like manchester united
what is us
i having fun
yeah
what do you like the bass
devi a
the people here are
you like this
yeah
where is great
is this field too spicy for you
i love
everyday
how much longer you stay
when you go


In [22]:
class AddMultiKeywords:

    def __init__(self, text, keyword_dict):
        self.text = text
        self.keyword_dict = keyword_dict

    def addkey(self):
        keyword_processor = KeywordProcessor()
        keyword_processor.add_keywords_from_dict(self.keyword_dict)
        extractedKeyword = keyword_processor.extract_keywords(self.text)
        return extractedKeyword

In [23]:
extractedKeywordMap = {}

for val in textMap.keys():
    adding = AddMultiKeywords(textMap[val],
                            {"place": ["england"],
                            "team": ["manchester united"],
                            "game": ["football"] } )
    result = adding.addkey()
    extractedKeywordMap[val] = result

In [24]:
print(extractedKeywordMap)

{'./speech-slices\\speech_slice2.wav': ['team'], './speech-slices\\speech_slice3.wav': [], './speech-slices\\speech_slice4.wav': [], './speech-slices\\speech_slice5.wav': [], './speech-slices\\speech_slice7.wav': [], './speech-slices\\speech_slice8.wav': [], './speech-slices\\speech_slice9.wav': [], './speech-slices\\speech_slice10.wav': [], './speech-slices\\speech_slice11.wav': [], './speech-slices\\speech_slice12.wav': [], './speech-slices\\speech_slice13.wav': [], './speech-slices\\speech_slice15.wav': [], './speech-slices\\speech_slice17.wav': [], './speech-slices\\speech_slice18.wav': [], './speech-slices\\speech_slice19.wav': []}
