### Testing an agent for downloading news from GNews API and summarizing them for user convenience
First, we install OpenAI libraries

In [12]:
#!python -m pip install -U "openai==2.2.0" "openai-agents==0.1.0" python-dotenv


Then, we import libraries required for our agent and also config with environment variables (stored in the .env file in the same directory as this notebook)

In [13]:
import os, asyncio

#libraries required for the agent
from openai import OpenAI
from agents.agent import Agent
from agents.run import Runner
from agents.tool import function_tool

#libraries required for the functions
from dataclasses import dataclass
from typing import Optional
from datetime import datetime

import requests

#loading environment variables
from dotenv import load_dotenv
load_dotenv()

True

For manipulating the news, we create a class which will have all the fields required for the news processing.

In [14]:
@dataclass
class NewsArticle:
    """Represents a news article with all relevant metadata."""
    
    title: str
    content: str
    url: str
    source: str
    published_at: str
    language: str
    category: str
    summary: Optional[str] = None
    english_summary: Optional[str] = None
    english_title: Optional[str] = None
    relevance_score: Optional[float] = None
    quality_score: Optional[float] = None
    
    def __post_init__(self):
        """Validate and clean data after initialization."""
        if not self.title.strip():
            raise ValueError("Title cannot be empty")
        if not self.content.strip():
            raise ValueError("Content cannot be empty")
        if not self.url.strip():
            raise ValueError("URL cannot be empty")
    
    def to_dict(self) -> dict:
        """Convert article to dictionary for serialization."""
        return {
            'title': self.title,
            'content': self.content,
            'url': self.url,
            'source': self.source,
            'published_at': self.published_at,
            'language': self.language,
            'category': self.category,
            'summary': self.summary,
            'english_summary': self.english_summary,
            'english_title': self.english_title,
            'relevance_score': self.relevance_score,
            'quality_score': self.quality_score
        }
    
    @classmethod
    def from_dict(cls, data: dict) -> 'NewsArticle':
        """Create article from dictionary."""
        return cls(**data)
    
    def get_display_title(self) -> str:
        """Get the best available title (English preferred)."""
        return self.english_title or self.title
    
    def get_display_summary(self) -> str:
        """Get the best available summary (English preferred)."""
        return self.english_summary or self.summary or self.content[:200] + "..."


Then, we create a config class for an agent.

The agent will be able to query GNews for articles in multiple languages and from different countries to maintain diverse source condition. And the class will store both API keys required for fetching and processing the news, and also the diversity factors information, like country of news origin and languages. 

In [None]:
@dataclass
class AgentConfig:
    """Configuration for the news agent."""
    
    # API Configuration
    openai_api_key: str
    gnews_api_key: str
    
    # Processing Configuration
    max_articles_per_source: int = 2
    target_total_articles: int = 15
    min_sources: int = 5
    
    # Diversity Configuration
    countries: list[str] = None
    languages: list[str] = None
    academic_sources_enabled: bool = True
    
    # AI Configuration
    model: str = "gpt-4o-mini"
    max_tokens: int = 200
    temperature: float = 0.3
    
    # Rate Limiting
    request_delay: float = 1.0
    
    def __post_init__(self):
        """Set default values if not provided."""
        if self.countries is None:
            self.countries = ['us', 'gb', 'ca', 'ru', 'in', 'de', 'fr', 'jp']
        if self.languages is None:
            self.languages = ['en', 'ru']
    
    def to_dict(self) -> dict[str, any]:
        """Convert config to dictionary."""
        return {
            'openai_api_key': self.openai_api_key,
            'gnews_api_key': self.gnews_api_key,
            'max_articles_per_source': self.max_articles_per_source,
            'target_total_articles': self.target_total_articles,
            'min_sources': self.min_sources,
            'countries': self.countries,
            'languages': self.languages,
            'academic_sources_enabled': self.academic_sources_enabled,
            'model': self.model,
            'max_tokens': self.max_tokens,
            'temperature': self.temperature,
            'request_delay': self.request_delay,
            'academic_delay': self.academic_delay
        }
    
    @classmethod
    def from_dict(cls, data: dict[str, any]) -> 'AgentConfig':
        """Create config from dictionary."""
        return cls(**data)

We then populate the config with the data.

In [16]:
def create_config_from_env():
    """Create configuration from environment variables."""
    try:
        return AgentConfig(
            openai_api_key=os.getenv('OPENAI_API_KEY', ''),
            gnews_api_key=os.getenv('GNEWS_API_KEY', '')
        )
    except Exception as e:
        print(f"Error creating config from environment: {e}")
        return None

config = create_config_from_env()
gnews_base_url = 'https://gnews.io/api/v4'


The agent will need to have the access to multiple functions which fetch the data from GNews using agent config, and also processing the news (like summarization or ensuring diversity). The full list of functions available for agent is:

- fetch_news_from_gnews. Function fetches news from API of GNews. 
- fetch_diverse_news_sources. Fetches enough articles from each source, and also for each language and country. 
- filter_articles_by_relevance. Calculate relevance score based on count of relevant words to the topic in the title and content and ranges the articles by relevance. 
- ensure_source_diversity. There is another setting in the agent config of how many articles we need to have from each source for diversity, and it is related to number of articles filtered by relevance. 
- get_source_statistics.


In [17]:
@function_tool
def fetch_news_from_gnews(
     
    query: str, 
    max_articles: int = 10, 
    country: str = 'us', 
    language: str = 'en'
) -> list[dict[str, any]]:
    """Fetch news articles from GNews API."""
    try:
        url = f"{gnews_base_url}/search"
        params = {
            'token': config.gnews_api_key,
            'q': query,
            'country': country,
            'lang': language,
            'max': max_articles,
            'expand': 'content'
        }
        
        response = requests.get(url, params=params, timeout=30)
        response.raise_for_status()
        
        data = response.json()
        articles = data.get('articles', [])
        
        print(f"Fetched {len(articles)} articles for query '{query}' from {country} ({language})")
        return articles
        
    except Exception as e:
        print(f"Error fetching news for query '{query}': {e}")
        return []

@function_tool
def fetch_diverse_news_sources( query: str) -> list[NewsArticle]:
    """Fetch news from diverse sources to ensure variety."""
    all_articles = []
    source_counts = Counter()
    
    # Create combinations of countries and languages for diversity
    combinations = []
    for country in config.countries:
        for language in config.languages:
            combinations.append((country, language))
    
    random.shuffle(combinations)  # Randomize order
    
    for country, language in combinations:
        if len(all_articles) >= config.target_total_articles:
            break
            
        try:
            articles = fetch_news_from_gnews(
                query, 
                max_articles=3, 
                country=country, 
                language=language
            )
            
            for article in articles:
                source_name = article.get('source', {}).get('name', 'Unknown')
                
                # Check if we already have enough from this source
                if source_counts[source_name] >= config.max_articles_per_source:
                    continue
                    
                # Create NewsArticle object
                news_article = NewsArticle(
                    title=article.get('title', ''),
                    content=article.get('content', '') or article.get('description', ''),
                    url=article.get('url', ''),
                    source=source_name,
                    published_at=article.get('publishedAt', ''),
                    language=language,
                    category='news'
                )
                
                all_articles.append(news_article)
                source_counts[source_name] += 1
                
            # Add delay to avoid rate limiting
            time.sleep(config.request_delay)
            
        except Exception as e:
            print(f"Error fetching from {country} ({language}): {e}")
            continue
    
    print(f"\nTotal articles collected: {len(all_articles)}")
    print(f"Unique sources: {len(source_counts)}")
    print(f"Source distribution: {dict(source_counts)}")
    
    return all_articles

@function_tool
def filter_articles_by_relevance(
     
    articles: list[NewsArticle], 
    topic: str,
    min_relevance_score: float = 0.4
) -> list[NewsArticle]:
    """Filter articles based on relevance to the topic."""
    relevant_articles = []
    
    for article in articles:
        # Simple relevance check based on title and content
        title_lower = article.title.lower()
        content_lower = article.content.lower()
        topic_lower = topic.lower()
        
        # Calculate simple relevance score
        title_score = sum(1 for word in topic_lower.split() if word in title_lower)
        content_score = sum(1 for word in topic_lower.split() if word in content_lower)
        
        relevance_score = (title_score * 0.7 + content_score * 0.3) / len(topic_lower.split())
        
        if relevance_score >= min_relevance_score:
            article.relevance_score = relevance_score
            relevant_articles.append(article)
    
    print(f"Filtered {len(articles)} articles to {len(relevant_articles)} relevant articles")
    return relevant_articles

@function_tool
def ensure_source_diversity( articles: list[NewsArticle]) -> list[NewsArticle]:
    """Ensure source diversity in the article collection."""
    if len(articles) <= config.target_total_articles:
        return articles
    
    source_counts = Counter()
    diverse_articles = []
    
    # Sort articles by relevance score (if available)
    sorted_articles = sorted(
        articles, 
        key=lambda x: x.relevance_score or 0, 
        reverse=True
    )
    
    for article in sorted_articles:
        source_name = article.source
        
        # Check if we need more diversity
        if source_counts[source_name] < config.max_articles_per_source:
            diverse_articles.append(article)
            source_counts[source_name] += 1
        
        if len(diverse_articles) >= config.target_total_articles:
            break
    
    print(f"Ensured diversity: {len(diverse_articles)} articles from {len(source_counts)} sources")
    return diverse_articles

@function_tool
def get_source_statistics( articles: list[NewsArticle]) -> dict[str, any]:
    """Get statistics about article sources."""
    if not articles:
        return {}
    
    source_counts = Counter(article.source for article in articles)
    language_counts = Counter(article.language for article in articles)
    category_counts = Counter(article.category for article in articles)
    
    return {
        'total_articles': len(articles),
        'unique_sources': len(source_counts),
        'source_distribution': dict(source_counts),
        'language_distribution': dict(language_counts),
        'category_distribution': dict(category_counts),
        'academic_articles': sum(1 for article in articles if article.is_academic),
        'news_articles': sum(1 for article in articles if not article.is_academic)
    }

For the functions, related to processing of the news content (summarization, translation, evaluating relevance) - we need to create a client to OpenAI. Then, we describe the tools used for the functions above. Some of the functions (like the one for relevance) is an LLM alternative to the finding the relevance by the word count (like the function in the block above). Somebody who makes the final agent instruction need to maintain balance between the filtering efficiency and tokens count, chosing between those functions.

In [18]:
client = OpenAI(api_key=config.openai_api_key)

@function_tool
def generate_summary( text: str, language: str = 'en', topic: str = None) -> str:
    """Generate summary using OpenAI."""
    try:
        if not text or len(text.strip()) < 50:
            return "Insufficient content for summary."
        
        # Truncate if too long
        max_length = 3000
        if len(text) > max_length:
            text = text[:max_length] + "..."
        
        system_prompt = f"You are a news summarizer. Create a concise, informative summary in {language}. Focus on key facts and main points. Keep it under 150 words."
        
        if topic:
            system_prompt += f" Focus on aspects relevant to '{topic}'."
        
        response = client.chat.completions.create(
            model=config.model,
            messages=[
                {
                    "role": "system",
                    "content": system_prompt
                },
                {
                    "role": "user",
                    "content": f"Summarize this content:\n\n{text}"
                }
            ],
            max_tokens=config.max_tokens,
            temperature=config.temperature
        )
        
        return response.choices[0].message.content.strip()
        
    except Exception as e:
        print(f"Error generating summary: {e}")
        return text[:200] + "..." if len(text) > 200 else text

@function_tool
def translate_to_english( text: str, topic: str = None) -> str:
    """Translate text to english using OpenAI."""
    try:
        if not text or len(text.strip()) < 10:
            return text
        
        system_prompt = "You are a professional translator. Translate the following text to English. Maintain the original meaning and tone. If the text is already in English, return it as is."
        
        if topic:
            system_prompt += f" Pay special attention to terminology related to '{topic}'."
        
        response = client.chat.completions.create(
            model=config.model,
            messages=[
                {
                    "role": "system",
                    "content": system_prompt
                },
                {
                    "role": "user",
                    "content": f"Translate to english: {text}"
                }
            ],
            max_tokens=300,
            temperature=0.1
        )
        
        return response.choices[0].message.content.strip()
        
    except Exception as e:
        print(f"Error translating to english: {e}")
        return text

@function_tool
def evaluate_article_relevance( article: NewsArticle, topic: str) -> dict[str, any]:
    """Evaluate article relevance using AI."""
    try:
        response = client.chat.completions.create(
            model=config.model,
            messages=[
                {
                    "role": "system",
                    "content": f"""You are a news relevance evaluator. Determine if an article is relevant to the given topic. Consider:
                    1. Direct relevance to the topic
                    2. Quality of content
                    3. Recency
                    4. Source credibility
                    
                    Return a JSON response with:
                    - relevance_score: float (0-1)
                    - is_relevant: boolean
                    - reasoning: string
                    - quality_score: float (0-1)"""
                },
                {
                    "role": "user",
                    "content": f"Topic: '{topic}'\nArticle: '{article.title}'\nContent: '{article.content[:500]}...'\n\nIs this article relevant?"
                }
            ],
            max_tokens=200,
            temperature=0.2
        )
        
        evaluation_text = response.choices[0].message.content.strip()
        
        # Try to parse JSON response
        try:
            import json
            evaluation = json.loads(evaluation_text)
        except:
            # Fallback evaluation
            evaluation = {
                "relevance_score": 0.7,
                "is_relevant": True,
                "reasoning": "Article appears relevant to topic",
                "quality_score": 0.6
            }
        
        return evaluation
        
    except Exception as e:
        print(f"Error in article evaluation: {e}")
        return {
            "relevance_score": 0.5, 
            "is_relevant": True, 
            "reasoning": f"Error in evaluation: {e}",
            "quality_score": 0.5
        }

@function_tool
def analyze_topic( topic: str) -> dict[str, any]:
    """Analyze topic and provide processing recommendations."""
    try:
        response = client.chat.completions.create(
            model=config.model,
            messages=[
                {
                    "role": "system",
                    "content": """You are an expert news analyst. Analyze the given topic and determine the best strategy for news processing. Consider:
                    1. What countries/regions would have the most relevant news?
                    2. What languages should be prioritized?
                    3. Are there academic sources that would be relevant?
                    4. What's the expected diversity of sources?
                    
                    Return a JSON response with your analysis and recommendations."""
                },
                {
                    "role": "user",
                    "content": f"Analyze this news topic and recommend a processing strategy: '{topic}'"
                }
            ],
            max_tokens=500,
            temperature=0.3
        )
        
        analysis_text = response.choices[0].message.content.strip()
        
        # Try to parse JSON response
        try:
            import json
            analysis = json.loads(analysis_text)
        except:
            # Fallback analysis
            analysis = {
                "topic": topic,
                "recommended_countries": ["us", "gb", "in", "ru", "cn"],
                "recommended_languages": ["en", "ru"],
                "academic_relevance": "medium",
                "expected_diversity": "high",
                "strategy": "comprehensive"
            }
        
        return analysis
        
    except Exception as e:
        print(f"Error in topic analysis: {e}")
        return {"error": str(e)}

@function_tool
def merge_related_articles( articles: list[NewsArticle]) -> list[NewsArticle]:
    """Merge related articles using AI."""
    if len(articles) <= 1:
        return articles
    
    try:
        # Group articles by similarity (simplified approach)
        article_groups = []
        
        for article in articles:
            # Simple similarity check based on title keywords
            added_to_group = False
            for group in article_groups:
                if any(keyword in article.title.lower() for keyword in group[0].title.lower().split()[:3]):
                    group.append(article)
                    added_to_group = True
                    break
            
            if not added_to_group:
                article_groups.append([article])
        
        merged_articles = []
        
        for group in article_groups:
            if len(group) == 1:
                merged_articles.append(group[0])
            else:
                # Merge multiple articles
                primary_article = group[0]
                supplementary_articles = group[1:]
                
                # Create merged content
                merged_content = f"{primary_article.content}\n\n"
                merged_content += "Additional sources:\n"
                for article in supplementary_articles:
                    merged_content += f"- {article.title} ({article.source})\n"
                
                primary_article.content = merged_content
                merged_articles.append(primary_article)
        
        return merged_articles
        
    except Exception as e:
        print(f"Error merging articles: {e}")
        return articles

@function_tool
def process_articles_batch(
     
    articles: list[NewsArticle], 
    topic: str
) -> list[NewsArticle]:
    """Process a batch of articles with AI tools."""
    processed_articles = []
    
    for article in articles:
        try:
            # Generate summary
            if not article.summary:
                article.summary = generate_summary(
                    article.content, 
                    article.language, 
                    topic
                )
            
            # Translate to english
            if not article.english_title:
                article.english_title = translate_to_english(article.title, topic)
            if not article.english_summary:
                article.english_summary = translate_to_english(article.summary, topic)
            
            processed_articles.append(article)
            
        except Exception as e:
            print(f"Error processing article '{article.title}': {e}")
            processed_articles.append(article)  # Add unprocessed article
    
    return processed_articles



Finally, we can create an agent, give it an instruction on news fetching and processing and list the tools the agent can use. 

In [None]:
agent = Agent(
    name="News agent",
    instructions="""You are an intelligent news processing agent. Your job is to help users get comprehensive news coverage on any topic.

You have access to the following tools:
- analyze_topic: Analyze a topic and determine processing strategy
- fetch_news_articles: Get news from diverse sources
- summarize_content: Generate AI summaries
- translate_to_english: Translate content to English
- evaluate_relevance: Check if content is relevant
- merge_related_articles: Combine related articles

Your goal is to provide comprehensive, diverse, and high-quality news coverage. Use your tools intelligently to:
1. Understand what the user wants
2. Gather information from multiple sources
3. Ensure diversity in sources and perspectives
4. Process and enhance the content
5. Present results in a useful format

Be strategic about which tools to use and in what order. You can call multiple tools, combine results, and make decisions based on what you find.""",
    tools=[fetch_news_from_gnews, fetch_diverse_news_sources, filter_articles_by_relevance, ensure_source_diversity, get_source_statistics, generate_summary, translate_to_english, evaluate_article_relevance,analyze_topic, merge_related_articles, process_articles_batch],
)

#Requesting the agent to give a comprehensive coverage on some topic:
async def main():
    result = await Runner.run(agent, input="Recent Meta glasses release")
    print(result.final_output)
   

await main()
