In [1]:
import json

from typing import Union, List
from pydantic import BaseModel, Field

from langchain_google_vertexai import VertexAI, ChatVertexAI

In [2]:
class Note(BaseModel):
    date: str
    note: str

class Notes(BaseModel):
    # due: List[str]
    # description: List[str]
    notes: List[Note]

In [3]:
def object_to_xml(data: Union[dict, bool], root='object'):
    xml = f'<{root}>'
    if isinstance(data, dict):
        for key, value in data.items():
            xml += object_to_xml(value, key)

    elif isinstance(data, (list, tuple, set)):
        for item in data:
            xml += object_to_xml(item, 'item')

    else:
        xml += str(data)

    xml += f'</{root}>'
    return xml

In [4]:
tasks = [
    {'due':'03/09/2024', 'description': 'submit summer work to physics', 'status': 100},
    {'due':'05/09/2024', 'description': 'request move change to AP US History', 'status':0}
]

In [5]:
xml_tasks = object_to_xml(tasks, 'tasks')

In [6]:
pro = ChatVertexAI(model='gemini-1.5-pro-002').with_structured_output(Notes, method='json_mode')

In [7]:
USER = 'high school student'

In [8]:
PROMPT = f"""You are generating synthetic data for testing task management application. The application will take informal notes from the user and create list of tasks with due dates.
You need to create list of notes with date on when the note was recorded and actual note text. Create at least 5 notes, some to create task and some to update status of the task. Each note should have at least two tasks
You user is {USER}.
The application should convert those notes into list of tasks:
{xml_tasks}
"""

In [9]:
for i in range(10):
    response = pro.invoke(PROMPT)
    with open(f'tasks-{i+1}.json', 'w+') as f:
        json.dump(response.json(), f)