## Speech-to-Text Test

Credit to Tommy Falgout; has a repo outlining how to implement speech recognition using microphone input:
https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py

This is a preliminary test of the Speech Recognition library, as well as Google Cloud API's speech recognition functionality. 

In [1]:
import speech_recognition as sr
from tensorflow.keras.models import Sequential, load_model
import numpy as np
import pandas as pd
import plotly.express as px

In [3]:
model_lr = load_model('../data/model_l.hd/')

In [4]:
model_sr = load_model('../data/model_s.hd/')

In [5]:
import numpy as np
import spacy
from sklearn.base import BaseEstimator, TransformerMixin

class WordVectorTransformer(TransformerMixin,BaseEstimator):
    def __init__(self, model="en_trf_distilbertbaseuncased_lg"):    #put bert embeddings here
        self.model = model
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        nlp = spacy.load(self.model)
        return np.concatenate([nlp(doc).vector.reshape(1,-1) for doc in X])

In [6]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [7]:
bertvect = WordVectorTransformer()

In [8]:
analyzer = SentimentIntensityAnalyzer()

Below we bring in the combine_predict function we built when we built the final composite predictive model; we will feed the speech to text recognized text into this function.

In [9]:
def combine_predict(sentence):
    sentence = [[sentence]]

    preds = np.array([])
    s_input_model_l = np.array([bertvect.transform(i) for i in sentence]).reshape(-1,768,1)
    s_input_model_s = pd.DataFrame([analyzer.polarity_scores(i) for i in sentence])
    
    preds_l = model_lr.predict(s_input_model_l)
    preds_s = model_sr.predict(s_input_model_s)

    preds = 0.7*preds_l+0.3*preds_s
            
    return preds

In [22]:
r = sr.Recognizer()
with sr.Microphone() as source:
    print("Say something!")
    audio = r.listen(source)

Say something!


In [23]:
try:
    # for testing purposes, we're just using the default API key
    # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
    # instead of `r.recognize_google(audio)`
    speech_output = r.recognize_google(audio)
    print(f'YOU SAID: {speech_output}')
    print(f'Model Prediction for Depression Probability: {combine_predict(speech_output)[0][0]}')
except sr.UnknownValueError:
    print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
    print("Could not request results from Google Speech Recognition service; {0}".format(e))

YOU SAID: testing
Model Prediction for Depression Probability: 0.3224950432777405
