#### Develop an agent function to process a list of WhatsApp messages and extract events.

1. Load the WhatsApp messages from a file.
2. Process the messages to extract events.
3. Visualize the events in a table.

In [263]:
# Libraries & functions to pretty print text and JSON responses, just needed for the notebook
import textwrap # Text wrapping
from pprint import pprint # Pretty printing json

def pretty_print(response):
    if isinstance(response, (str, dict)):
        print(textwrap.fill(response, width=80))
    else:
        response_dict = response.model_dump()
        pprint(response_dict, width=80)

In [264]:
# Open the file

with open('dummy-data/dummy_whatsapp_3_events.txt', 'r', encoding='utf-8') as file:
    sample_message_history = file.read()


In [265]:
pretty_print(sample_message_history)

[22/11/24, 10:11:31] Sofia: We played on Wednesday at the usual spot. Good
memories! [22/11/24, 10:25:31] Maria: Yes, it was fun! 👍 [22/11/24, 10:36:31]
Ana: Yes, it was fun! 👍 [22/11/24, 10:43:31] Laura: Yes, it was fun! 👍
[22/11/24, 10:50:31] Carmen: Yes, it was fun! 👍 [22/11/24, 10:53:31] Julia:
Missed it 😢 [22/11/24, 11:07:31] Elena: Yes, it was fun! 👍 [22/11/24, 11:20:31]
Isabel: So sad I couldn't make it. [22/11/24, 11:26:31] Paula: Yes, it was fun!
👍 [22/11/24, 11:40:31] Victoria: Yes, it was fun! 👍 [01/01/25, 10:06:31]
Victoria: Who wants to play on Friday at 19:00 at the indoor court? [01/01/25,
10:18:31] Maria: Count me in! 👍 [01/01/25, 10:21:31] Sofia: Count me in! 👍
[01/01/25, 10:29:31] Ana: Count me in! 👍 [01/01/25, 10:39:31] Laura: Count me
in! 👍 [01/01/25, 10:47:31] Carmen: Count me in! 👍 [01/01/25, 10:54:31] Julia:
Sorry, Im busy that day. [01/01/25, 11:08:31] Elena: Count me in! 👍 [01/01/25,
11:30:31] Paula: Can't make it 😢 [01/01/25, 10:11:31] Paula: How about Sunday 

In [266]:
from datetime import datetime
import re

def format_message_history(sample_message_history):
    # Define a regex pattern to find timestamps in the format [DD/MM/YY, HH:MM:SS]
    pattern = r'\[(\d{2}/\d{2}/\d{2}), (\d{2}:\d{2}:\d{2})\]'
    
    # Function to replace each timestamp with the desired format
    def replace_timestamp(match):
        date_str = match.group(1)  # Get the date part
        time_str = match.group(2)  # Get the time part
        # Combine date and time for parsing
        full_datetime_str = f"{date_str} {time_str}"
        # Parse the datetime
        message_datetime = datetime.strptime(full_datetime_str, "%d/%m/%y %H:%M:%S")
        # Get the day of the week
        day_of_week = message_datetime.strftime("%A")
        # Return the formatted timestamp with the day of the week
        return f"[{date_str}, {day_of_week} {time_str}]"

    # Replace all timestamps in the message history
    formatted_message_history = re.sub(pattern, replace_timestamp, sample_message_history)
    
    return formatted_message_history

In [267]:
formatted_history = format_message_history(sample_message_history)
print(formatted_history)

[22/11/24, Friday 10:11:31] Sofia: We played on Wednesday at the usual spot. Good memories!
[22/11/24, Friday 10:25:31] Maria: Yes, it was fun! 👍
[22/11/24, Friday 10:36:31] Ana: Yes, it was fun! 👍
[22/11/24, Friday 10:43:31] Laura: Yes, it was fun! 👍
[22/11/24, Friday 10:50:31] Carmen: Yes, it was fun! 👍
[22/11/24, Friday 10:53:31] Julia: Missed it 😢
[22/11/24, Friday 11:07:31] Elena: Yes, it was fun! 👍
[22/11/24, Friday 11:20:31] Isabel: So sad I couldn't make it.
[22/11/24, Friday 11:26:31] Paula: Yes, it was fun! 👍
[22/11/24, Friday 11:40:31] Victoria: Yes, it was fun! 👍
[01/01/25, Wednesday 10:06:31] Victoria: Who wants to play on Friday at 19:00 at the indoor court?
[01/01/25, Wednesday 10:18:31] Maria: Count me in! 👍
[01/01/25, Wednesday 10:21:31] Sofia: Count me in! 👍
[01/01/25, Wednesday 10:29:31] Ana: Count me in! 👍
[01/01/25, Wednesday 10:39:31] Laura: Count me in! 👍
[01/01/25, Wednesday 10:47:31] Carmen: Count me in! 👍
[01/01/25, Wednesday 10:54:31] Julia: Sorry, Im busy th

In [268]:
from pydantic import BaseModel, Field
from openai import OpenAI
from dotenv import load_dotenv
from prompts import SYSTEM_PROMPT

load_dotenv()

client = OpenAI()

class CalendarEvent(BaseModel):
    name: str = Field(description="The name of the event")
    event_date: str = Field(description="The date of the event. Not the date the message timestamp.")
    time: str = Field(description="The time of the event")
    participants: list[str] = Field(description="The participants of the event")
    number_of_participants: int = Field(description="The number of participants of the event")
    not_attending: list[str] = Field(description="The people who are not attending the event")
    didnt_confirm: list[str] = Field(description="The people who didn't confirm the event")
    location: str = Field(description="The location of the event")

class CalendarEvents(BaseModel):
    events: list[CalendarEvent]

completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": formatted_history},
    ],
    response_format=CalendarEvents,
)

response = completion.choices[0].message.parsed

In [269]:
pretty_print(completion.choices[0].message)

{'audio': None,
 'content': '{\n'
            '  "events": [\n'
            '    {\n'
            '      "name": "Game on Wednesday",\n'
            '      "event_date": "20/11/24",\n'
            '      "time": "N/A",\n'
            '      "participants": [\n'
            '        "Sofia",\n'
            '        "Maria",\n'
            '        "Ana",\n'
            '        "Laura",\n'
            '        "Carmen",\n'
            '        "Elena",\n'
            '        "Paula",\n'
            '        "Victoria"\n'
            '      ],\n'
            '      "number_of_participants": 8,\n'
            '      "not_attending": [\n'
            '        "Julia",\n'
            '        "Isabel"\n'
            '      ],\n'
            '      "didnt_confirm": [],\n'
            '      "location": "Usual spot"\n'
            '    },\n'
            '    {\n'
            '      "name": "Game on Friday",\n'
            '      "event_date": "03/01/25",\n'
            '      "time": "19:00"

In [270]:
response.events

[CalendarEvent(name='Game on Wednesday', event_date='20/11/24', time='N/A', participants=['Sofia', 'Maria', 'Ana', 'Laura', 'Carmen', 'Elena', 'Paula', 'Victoria'], number_of_participants=8, not_attending=['Julia', 'Isabel'], didnt_confirm=[], location='Usual spot'),
 CalendarEvent(name='Game on Friday', event_date='03/01/25', time='19:00', participants=['Victoria', 'Maria', 'Sofia', 'Ana', 'Laura', 'Carmen', 'Elena'], number_of_participants=7, not_attending=['Julia', 'Paula'], didnt_confirm=['Isabel'], location='Indoor court'),
 CalendarEvent(name='Game on Sunday', event_date='05/01/25', time='18:00', participants=['Paula', 'Maria', 'Sofia', 'Ana', 'Carmen', 'Julia', 'Elena'], number_of_participants=7, not_attending=['Laura'], didnt_confirm=['Victoria', 'Isabel'], location='Indoor court')]

In [271]:
for event in response.events:
    print(event.model_dump_json())

{"name":"Game on Wednesday","event_date":"20/11/24","time":"N/A","participants":["Sofia","Maria","Ana","Laura","Carmen","Elena","Paula","Victoria"],"number_of_participants":8,"not_attending":["Julia","Isabel"],"didnt_confirm":[],"location":"Usual spot"}
{"name":"Game on Friday","event_date":"03/01/25","time":"19:00","participants":["Victoria","Maria","Sofia","Ana","Laura","Carmen","Elena"],"number_of_participants":7,"not_attending":["Julia","Paula"],"didnt_confirm":["Isabel"],"location":"Indoor court"}
{"name":"Game on Sunday","event_date":"05/01/25","time":"18:00","participants":["Paula","Maria","Sofia","Ana","Carmen","Julia","Elena"],"number_of_participants":7,"not_attending":["Laura"],"didnt_confirm":["Victoria","Isabel"],"location":"Indoor court"}


In [272]:
import pandas as pd
import json

pd.DataFrame([json.loads(event.model_dump_json()) for event in response.events])


Unnamed: 0,name,event_date,time,participants,number_of_participants,not_attending,didnt_confirm,location
0,Game on Wednesday,20/11/24,,"[Sofia, Maria, Ana, Laura, Carmen, Elena, Paul...",8,"[Julia, Isabel]",[],Usual spot
1,Game on Friday,03/01/25,19:00,"[Victoria, Maria, Sofia, Ana, Laura, Carmen, E...",7,"[Julia, Paula]",[Isabel],Indoor court
2,Game on Sunday,05/01/25,18:00,"[Paula, Maria, Sofia, Ana, Carmen, Julia, Elena]",7,[Laura],"[Victoria, Isabel]",Indoor court


In [273]:
# process the messages to extract events
class CalendarEvent(BaseModel):
    name: str
    date: str
    time: str
    participants: str
    location: str
    number_of_participants: int

class CalendarEvents(BaseModel):
    events: list[CalendarEvent]

def process_messages(message_history):
    
    # Initialize the OpenAI client
    client = OpenAI()

    # Make the OpenAI API call to extract the events
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": sample_message_history},
        ],
        response_format=CalendarEvents,
    )

    # Parse the response
    response = completion.choices[0].message.parsed

    return pd.DataFrame([json.loads(event.model_dump_json()) for event in response.events])


In [274]:
events_df = process_messages(sample_message_history)

events_df

Unnamed: 0,name,date,time,participants,location,number_of_participants
0,Game on Wednesday,20/11/24,,"Sofia, Maria, Ana, Laura, Carmen, Elena, Paula...",Usual spot,8
1,Game on Friday,03/01/25,19:00,"Victoria, Maria, Sofia, Ana, Laura, Carmen, Elena",Indoor court,7
2,Game on Sunday,05/01/25,18:00,"Paula, Maria, Sofia, Ana, Carmen, Julia, Elena",Indoor court,7


In [275]:
from datetime import datetime

datetime.now().strftime("%A")

'Friday'