## Set up dependencies

In [49]:
import os
import time

from dotenv import load_dotenv
import openai
import azure.cognitiveservices.speech as speechsdk

## Set up environment variables

In [50]:
load_dotenv()

True

In [51]:
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_key = os.getenv('OPENAI_API_KEY') 
openai.api_base = os.getenv('OPENAI_API_BASE') 
openai.api_version = os.getenv('OPENAI_API_VERSION')

In [52]:
SPEECH_API_KEY = os.getenv('SPEECH_API_KEY')
SPEECH_API_REGION = os.getenv('SPEECH_API_REGION')

In [53]:
def recognize_speech_from_file(filename):
    speech_api_key = SPEECH_API_KEY
    speech_api_region = SPEECH_API_REGION

    speech_config = speechsdk.SpeechConfig(subscription=speech_api_key, region=speech_api_region)
    audio_config = speechsdk.audio.AudioConfig(filename=filename)
    # Creates a speech recognizer using a file as audio input, also specify the speech language
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,  audio_config=audio_config)
    global done 
    done = False
    global recognized_text_list 
    recognized_text_list=[]
    def stop_cb(evt: speechsdk.SessionEventArgs):
        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
        print('CLOSING on {}'.format(evt))
        global done
        done = True

    def recognize_cb(evt: speechsdk.SpeechRecognitionEventArgs):
        """callback for recognizing the recognized text"""
        global recognized_text_list
        recognized_text_list.append(evt.result.text)
        print('RECOGNIZED: {}'.format(evt.result.text))

    # Connect callbacks to the events fired by the speech recognizer
    speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
    speech_recognizer.recognized.connect(recognize_cb)
    speech_recognizer.session_started.connect(lambda evt: print('STT SESSION STARTED: {}'.format(evt)))
    speech_recognizer.session_stopped.connect(lambda evt: print('STT SESSION STOPPED {}'.format(evt)))
    speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
    # stop continuous recognition on either session stopped or canceled events
    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    # Start continuous speech recognition
    speech_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)

    speech_recognizer.stop_continuous_recognition()

    return recognized_text_list


## Transcribe Interview to Text

In [54]:
text = recognize_speech_from_file("../data/sample.wav")
print(text)

RECOGNIZING: SpeechRecognitionEventArgs(session_id=8a2488e45e8b4a3ca1d42d32aaab594d, result=SpeechRecognitionResult(result_id=96d6ac2a50914553b955e66de581177b, text="hello mrs stevens my name is jane phillips i'm the personnel director i'm pleased to meet you please have a seat thank you", reason=ResultReason.RecognizingSpeech))


STT SESSION STARTED: SessionEventArgs(session_id=4097ecb73a00417b8de377161368419d)


RECOGNIZED: Hello, Mrs. Stevens. My name is Jane Phillips. I'm the Personnel director. I'm pleased to meet you. Please have a seat. Thank you.
RECOGNIZING: SpeechRecognitionEventArgs(session_id=8a2488e45e8b4a3ca1d42d32aaab594d, result=SpeechRecognitionResult(result_id=2cdeca3e32744e8ab3b3cd8500eef762, text="according", reason=ResultReason.RecognizingSpeech))
RECOGNIZING: SpeechRecognitionEventArgs(session_id=8a2488e45e8b4a3ca1d42d32aaab594d, result=SpeechRecognitionResult(result_id=8d970fc0f3154730a10b96e8d104ac0b, text="according to your", reason=ResultReason.RecognizingSpeech))
RECOGNIZING: SpeechRecognitionEventArgs(session_id=8a2488e45e8b4a3ca1d42d32aaab594d, result=SpeechRecognitionResult(result_id=de74a35945ca426bb773dbc0b39f5b44, text="according to your res", reason=ResultReason.RecognizingSpeech))
RECOGNIZING: SpeechRecognitionEventArgs(session_id=8a2488e45e8b4a3ca1d42d32aaab594d, result=SpeechRecognitionResult(result_id=2cb7c382d3b7414b9280e0392b7a11d7, text="according to your