In [107]:
import os
import requests
import openai

zoom_transript_path = "85760048922_TRANSCRIPT.vtt"
asr_transcription_path = "asr_transcription.txt"

In [108]:
def ppt_matching_parse_transcript(transcript_file, num_participant, num_line_threshold = 3):
    """Parse a transcript file and extract conversation text from each participant.

    Args:
        transcript_file (str): Path to the transcript file.
        num_participant (int): Number of participants in the conversation.
        num_line_threshold (int, optional): Number of conversation lines from each participant
            used for further matching. Defaults to 3.

    Returns:
        str: A concatenated string of the extracted conversation text from each participant.

    Raises:
        FileNotFoundError: If the transcript file path is invalid.

    """

    def end_transcript_loop(ppt_length):
        return len(ppt_length) == 3 and all([v > 100 for v in ppt_length.values()])
    
    with open(transcript_file) as file:
        lines = file.readlines()

    ppt_text = {}
    ppt_length = {}

    for line in lines:
        if ": " not in line:
            continue
        line_split = line.split(": ")
        speaker, text = line_split[0], line_split[1]
        if speaker in ppt_length and ppt_length[speaker] > num_line_threshold:
            continue
        if speaker not in ppt_text:
            ppt_text[speaker] = text
        else:
            ppt_text[speaker] += text
        if speaker not in ppt_length:
            ppt_length[speaker] = 1
        else:
            ppt_length[speaker] += 1
        if end_transcript_loop(ppt_length):
            break
    result_text = ""
    for k, v in ppt_text.items():
        result_text += k + ": \n"
        result_text += v + "\n"
    return result_text

In [109]:
zoom_result_text = parse_transcript(zoom_transript_path, 3)
azure_result_text = parse_transcript(asr_transcription_path, 3)

The GPT prompt and content would be:

In [110]:
print("Given the following two transcript with different speaker representation, generate a JSON mapping, where each key is the participant in the first transcript, and the value is the corresponding participant name in the second transcript. Return the JSON only.")
print()
print("Transcript 1: \n" + azure_result_text + "\nTranscript 2: \n" + zoom_result_text)

Given the following two transcript with different speaker representation, generate a JSON mapping, where each key is the participant in the first transcript, and the value is the corresponding participant name in the second transcript. Return the JSON only.

Transcript 1: 
 Speaker 1: 
OK, let's start today's meeting since this seems like everyone's here. How was your week?
You have any plans?
Yeah, sounds fun. Mine was super busy too with all those cloud work which is still ongoing. I guess I'll be also be working on the poolside while I'm at Cancun. Anyway, let's start off with our first agenda item. Duke and Duchess of Sussex asked to vacate. You came home Frogmore College.
Prince Harry and Megan now reside in the United States, but had retained the early 19th century property as a UK base, gifted to the susexes by the late Queen Elizabeth. Third the second, the cottage is nestled in 1/4 of the Windsor's estate, West of London. They refurbish the property and moves in shortly before

In [111]:
API_KEY = os.environ['OPENAI_API_KEY']
url = 'https://api.openai.com/v1/chat/completions'
headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + API_KEY}

In [112]:
def ppt_matching_api_call(ASR_transcript, Zoom_transcript):
    
    # combine two transcript
    combined_text = "Transcript 1: \n" + ASR_transcript + "\nTranscript 2: \n" + Zoom_transcript
    
    prompt = "Given the following two transcript with different speaker representation, generate a JSON mapping, where each key is the participant in the first transcript, and the value is the corresponding participant name in the second transcript. Return the JSON only."
    data = {
        "model": "gpt-3.5-turbo",
        "messages": [
            {"role": "system", "content": "Perform participant matching and return a JSON"},
            {"role": "assistant", "content": combined_text},
            {"role": "user", "content": prompt},
        ],
        "temperature": 0
    }

    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        mapping = eval(ppt_matching_string['choices'][0]['message']['content'])
        if isinstance(mapping, dict):
            return mapping
        else:
            return ValueError("Error in GPT participant matching. Must return a dictionary mapping in its response.")
    else:
        raise ValueError("Fail to get GPT API response.")

In [113]:
ppt_matching_api_call(zoom_result_text, azure_result_text)

{'Speaker 1': 'Yooni Choi',
 'Speaker 2': 'Jiahe Feng',
 'Speaker 3': 'DAZHI PENG'}