In [1]:
import os
import wave
import json
from vosk import Model, KaldiRecognizer
import spacy
from spacy.matcher import Matcher

# Check if the Vosk model exists.
model_path = "model"
if not os.path.exists(model_path):
    print(f"Please download the Vosk model and unpack it as '{model_path}' in the current folder.")
    exit(1)

# Initialize Vosk model
model = Model(model_path)

In [2]:
# Function to transcribe audio
def transcribe_audio(file_path):
    with wave.open(file_path, "rb") as wf:
        recognizer = KaldiRecognizer(model, wf.getframerate())
        recognizer.SetWords(True)

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if recognizer.AcceptWaveform(data):
                pass

        # Get the final result
        result = recognizer.FinalResult()
        result_json = json.loads(result)
        return result_json.get("text", "")

# Function to extract dates and times using spaCy
def extract_date_time(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)

    dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
    times = [ent.text for ent in doc.ents if ent.label_ == "TIME"]

    return dates, times

# Path to the audio file (WAV format)
audio_file_path = "audio2.wav"

In [5]:
# Step 1: Transcribe the audio file
transcribed_text = transcribe_audio(audio_file_path)
print(f"Transcribed Text: {transcribed_text}")

# Step 2: Extract dates and times from the transcribed text
dates, times = extract_date_time(transcribed_text)
print(f"Extracted Dates: {dates}")
print(f"Extracted Times: {times}")

Transcribed Text: coombs components to whoosh
Extracted Dates: []
Extracted Times: []
