In [1]:
import feedparser

def get_rss_updates(rss_url):
    feed = feedparser.parse(rss_url)
    updates = []
    for entry in feed.entries:
        updates.append({
            'title': entry.get('title', ''),
            'link': entry.get('link', ''),
            'published': entry.get('published', ''),
            'summary': entry.get('summary', '')
        })
    return updates

# Example usage:
# rss_url = 'https://rss.arxiv.org/rss/cs'
rss_url= 'https://www.science.org/action/showFeed?type=etoc&feed=rss&jc=science'
updates = get_rss_updates(rss_url)
print(updates[0].keys())

dict_keys(['title', 'link', 'published', 'summary'])


In [None]:
import os
import json
from typing import List, Tuple, Dict
from pydantic import BaseModel, Field, ValidationError
from langchain.output_parsers import PydanticOutputParser

from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage

MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1")
TEMPERATURE = float(os.getenv("MODEL_TEMPERATURE", 0.5))
MAX_TOKENS = int(os.getenv("MODEL_MAX_TOKENS", 150))
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE") or None

_llm_kwargs = {"model_name": MODEL_NAME, "temperature": TEMPERATURE}
if MAX_TOKENS:
    _llm_kwargs["max_tokens"] = MAX_TOKENS
if OPENAI_API_BASE:
    _llm_kwargs["openai_api_base"] = OPENAI_API_BASE

LLM = ChatOpenAI(**_llm_kwargs)


class SummarizationResult(BaseModel):
    summary: str = Field(..., description="Concise summary of the article")
    recipients: List[str] = Field(default_factory=list, description="Usernames to send the summary to")

parser = PydanticOutputParser(pydantic_object=SummarizationResult)

def summarize_articles(
    items: List[Tuple[str, str, str, str]], users: List[Dict[str, List[str]]]
) -> List[SummarizationResult]:
    """
    Summarize multiple articles (title, link, published, feed_summary) and
    select recipients based on user interests. Returns structured results.
    """

    # Format user interests
    user_info = "\n".join(
        f"- {u['username']}: {', '.join(u['interests'])}" for u in users
    )

    # Format articles
    article_lines = []
    for title, link, published, feed_summary in items:
        article_lines.append(
            f"Title: {title}\nLink: {link}\nPublished: {published}\nFeed Summary: {feed_summary}\n"
        )

    # Instructions for format
    system_prompt = (
        "You are an assistant that summarizes news articles and recommends them to users by matching topics of interest.\n"
    )

    full_prompt = (
        f"Users and their interests:\n{user_info}\n\n"
        f"Articles to summarize:\n{''.join(article_lines)}"
    )
    llm = LLM.with_structured_output(SummarizationResult)
    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=full_prompt)
    ]

    response = llm.invoke(messages)

    # Try parsing using the parser
    try:
        
        return [response.__dict__]  # Because it's a single result
    except Exception as e:
        raise ValueError(f"Model returned invalid structured output:\n{response}\n\nError: {e}")



In [3]:
# Example usage

summary = summarize_articles(
    items=[
        ("Sample Title", "http://example.com/article", "2023-10-01", "This is a sample summary of the article."),
    ],
    users=[
        {"username": "user1", "interests": ["science", "technology"]},
        {"username": "user2", "interests": ["health", "environment"]},
    ]
)

In [6]:
summary[0].dict().get('summary', 'No summary provided')

/var/folders/zj/4134lvvs5wjcfkw_mzqz721jy4h4ck/T/ipykernel_78416/3905111262.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  summary[0].dict().get('summary', 'No summary provided')


'This is a sample summary of the article.'

{'model_name': 'gemma-3-27b-it-qat',
 'model': 'gemma-3-27b-it-qat',
 'stream': False,
 'temperature': 0.5,
 'max_completion_tokens': 150,
 '_type': 'openai-chat'}