In [1]:
from typing import Optional, List

from datetime import datetime, timedelta

from pydantic import BaseModel, Field
from langchain_google_vertexai import ChatVertexAI

from updater.collect import new_articles

In [2]:
class ArticlePlan(BaseModel):
    ids: List[str] = Field(description='List of IDs of articles to use')

class Section(BaseModel):
    title: str
    description: Optional[str] = None
    literaly: bool = True

class Format(BaseModel):
    topic: str
    audience: str
    description: str
    sections: List[Section]

    def to_xml(self) -> str:

        description = f'<TOPIC>{self.topic}</TOPIC>\n<AUDIENCE>{self.audience}</AUDIENCE>\n<DESCRIPTION>{self.description}</DESCRIPTION'

        description += '<SECTIONS>/n'
        for section in self.sections:
            description += f'<SECTION><TITLE>{section.title}</TITLE><DESCRIPTION>{section.description}</DESCRIPTION>'
            if not section.literaly:
                description += '<INSTRUCTION>Use this as a guidline to create multiple sections as described</INSTRUCTION>'
        description += '/n</SECTIONS>'
    
        return '<FORMAT>\n' + description + '\n</FORMAT>'

In [25]:
newsletter = Format(
    topic='Space Exploration',
    audience='technical, mostly sofware engineers and data scientists',
    description='Newsletter of latest developments in the field',
    sections=[
        Section(title='TL;DR', description='summary of main development'),
        Section(title='Name of the article', description='one section for each article within selected topic, select 5 articles most relevant to topic', literaly=False)
    ]
)

In [16]:
DEFAULT_FEEDS = [
    'https://huyenchip.com/feed.xml',  # Regular RSS
    'https://decodingml.substack.com/feed',  # Substack
    'https://www.youtube.com/feeds/videos.xml?channel_id=UCeMcDx6-rOq_RlKSPehk2tQ'  # Youtube
]

In [17]:
starting_point = datetime(year=1900, month=1, day=1)

In [18]:
articles = new_articles(DEFAULT_FEEDS, starting_point)

In [26]:
prompt_articles = '<ARTICLES>' + ''.join([a.to_xml() for a in articles]) + '</ARTICLES>'

In [27]:
prompt = f"""Create outline of document with following specifications: {newsletter.to_xml()}

{prompt_articles}

Respond with list of sections, each section with plan how to create content for it.
"""

In [32]:
llm = ChatVertexAI(model='gemini-1.5-pro-002').with_structured_output(ArticlePlan, method='json_mode')

In [33]:
response = llm.invoke(prompt)

In [36]:
[a.title for (idx, a) in enumerate(articles) if str(a.id) in response.ids]

['Building A Generative AI Platform',
 'How NASA Will Land On Titan!',
 'The 2025 SpaceX Update Is Here!',
 'SpaceX Reveals New Starship Update!',
 "NASA Isn't Telling Us Something About This Asteroid"]

In [35]:
len(response.ids)

5