In [None]:
import os
from openai import OpenAI
import json 

In [None]:
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
audio_file_path = ""

audio_file = open(audio_file_path, "rb")
transcript = client.audio.transcriptions.create(
  file=audio_file,
  model="whisper-1",
  response_format="verbose_json",
  timestamp_granularities=["word"]
)

transcript

In [None]:
def analyze_stuttering(transcription):
    """
    Analyze the transcription for stuttering patterns using OpenAI's GPT API.
    """
    # Extract word-level data from the transcription object
    words = transcription.words
    word_data = [
        {
            "word": word.word,
            "start": word.start,
            "end": word.end
        }
        for word in words
    ]

    # Prepare the prompt with structured word data
    prompt = f"""As a speech analysis expert, analyze this transcription for stuttering patterns. 
    Each word has a start and end timestamp. Look for:
    1. Word repetitions
    2. Sound prolongations
    3. Blocks of silence or broken words
    4. Interjections or fillers when struggling with words
    5. Revision patterns or abandoned phrases
    
    Here is the transcription data:
    {word_data}
    
    Provide your analysis in JSON format with the following structure:
    {{
        "stutter_instances": [
            {{
                "timestamp": [start_time, end_time]
            }}
        ]
    }}"""

    messages = [
        {
            "role": "system",
            "content": "You are a speech analysis expert specializing in detecting speech disfluencies and stuttering patterns."
        },
        {
            "role": "user",
            "content": prompt
        }
    ]
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=0.3,
        max_tokens=500
    )

    return response.choices[0].message.content

In [None]:
response_json = analyze_stuttering(transcript)

start_index = response_json.find("{")  # Find where the JSON starts
json_string = response_json[start_index:]  # Extract everything from that point onward

# Parse the JSON string into a Python dictionary
json_object = json.loads(json_string)

# Print or use the extracted JSON object
print(json_object)