In [33]:
# Function to add pauses into transcript

def stitch_up_transcript(segments, pause_threshold=3):
    if not segments:
        return ""

    stitched = segments[0]['text'].strip()
    for i in range(len(segments) - 1):
        gap = segments[i+1]['start'] - segments[i]['end']
        if gap > pause_threshold:
            stitched += " ... "
        else:
            stitched += " "
        stitched += segments[i+1]['text'].strip()

    return stitched

In [34]:
# Format transcript from whisper and put into json format

def format_transcript(patient_id, day_num, result, pause_threshold=0.5):
    segments = result.get('segments', [])

    transcript_text = stitch_up_transcript(segments, pause_threshold)

    duration = segments[-1]['end'] if segments else 0

    return {
        'patient_id': patient_id,
        'day_num': day_num,
        'duration_sec': duration,
        'transcript_text': transcript_text,
        'segments': [
            {
                'start': segment['start'],
                'end': segment['end'],
                'text': segment['text']
            }
            for segment in segments
        ]
    }

In [36]:
# Whisper model to transcribe audio

import whisper

def transcribe(path):
    model = whisper.load_model('tiny')

    result = model.transcribe(path, word_timestamps=True)
    return result

In [37]:
# Dump formatted json into a file

def dump(patient_id, day_num, result, path):
    json_data = format_transcript(patient_id, day_num, result)
    import json
    with open(path, 'w') as f:
        json.dump(json_data, f, indent=4)

In [38]:
# Quick function to transcribe a file into the nice format

import os

def prep_transcription(patient_id, day_num, path, save_path=None):
    result = transcribe(path)
    if save_path is None:
        i = 0
        while True:
            candidate = f"output{'' if i == 0 else i}.json"
            if not os.path.exists(candidate):
                save_path = candidate
                break
            i += 1
    dump(patient_id, day_num, result, save_path)

In [39]:
# TEST CASE

prep_transcription('P001', 8, 'test_audio_full.wav')

