In [1]:
#Task 11: Retrieval , Generation()
# Build a system which generates the MOM from the transcript of the meeting.
import re
from transformers import pipeline

# ===== Step 1: Preprocess Transcript =====
def clean_transcript(transcript):
    transcript = re.sub(r'\[\d{2}:\d{2}(:\d{2})?\]', '', transcript)
    transcript = re.sub(r'\(\d{2}:\d{2}(:\d{2})?\)', '', transcript)
    transcript = re.sub(r'\b(um|uh|ah|like)\b', '', transcript, flags=re.I)
    transcript = re.sub(r'\s+', ' ', transcript).strip()
    return transcript

# ===== Step 2: Extract Action Items =====
def extract_action_items(transcript):
    lines = transcript.split('. ')
    action_items = []
    for line in lines:
        if any(word in line.lower() for word in ['will', 'to do', 'assign', 'responsible']):
            action_items.append(line.strip())
    return action_items

# ===== Step 3: Generate MOM =====
def generate_mom(transcript, attendees, date):
    cleaned_text = clean_transcript(transcript)
    
    # Summarize discussion
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(cleaned_text, max_length=200, min_length=50, do_sample=False)[0]['summary_text']
    
    # Extract action items
    action_items = extract_action_items(cleaned_text)
    
    # Create MOM text
    mom_text = f"""
    Minutes of Meeting
    Date: {date}
    Attendees: {', '.join(attendees)}

    Discussion Summary:
    {summary}

    Action Items:
    """
    for i, item in enumerate(action_items, 1):
        mom_text += f"{i}. {item}\n"
    
    return mom_text

# ===== Step 4: Example Usage =====
if __name__ == "__main__":
    transcript = """
    Rahul: We need to finalize the project plan by next Monday. 
    Ravi: I will take care of the budget report. 
    Ram: Let's review the marketing strategy tomorrow. 
    """
    attendees = ["Rahul", "Ravi", "Ram"]
    date = "2026-01-30"

    mom = generate_mom(transcript, attendees, date)
    print(mom)


Device set to use cpu
Your max_length is set to 200, but your input_length is only 38. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=19)



    Minutes of Meeting
    Date: 2026-01-30
    Attendees: Rahul, Ravi, Ram

    Discussion Summary:
    Rahul: We need to finalize the project plan by next Monday. Ravi: I will take care of the budget report. Ram: Let's review the marketing strategy tomorrow. Rahul: We must finalize it by Monday.

    Action Items:
    1. Ravi: I will take care of the budget report

