In [1]:
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

def search_videos(query: str, size: int = 5) -> list[dict]:
    """
    Search for videos whose titles or subtitles match a given query.
    
    Returns highlighted match information including video IDs and snippets.

    Args:
        query (str): The search query string to match against video titles and subtitles. Must be a non-empty string.
        size (int, optional): Maximum number of results to return. Must be a positive integer. Defaults to 5.
    """
    body = {
        "size": size,
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["title^3", "subtitles"],
                "type": "best_fields",
                "analyzer": "english_with_stop_and_stem"
            }
        },
        "highlight": {
            "pre_tags": ["*"],
            "post_tags": ["*"],
            "fields": {
                "title": {
                    "fragment_size": 150,
                    "number_of_fragments": 1
                },
                "subtitles": {
                    "fragment_size": 150,
                    "number_of_fragments": 1
                }
            }
        }
    }
    
    response = es.search(index="podcasts", body=body)
    hits = response.body['hits']['hits']
    
    results = []
    for hit in hits:
        highlight = hit['highlight']
        highlight['video_id'] = hit['_id']
        results.append(highlight)
    
    return results

def get_subtitles_by_id(video_id: str) -> dict:
    """
    Retrieve the full subtitle content for a specific video.

    Args:
        video_id (str): the YouTube video id for which we want to get the subtitles
    """
    result = es.get(index="podcasts", id=video_id)
    return result['_source']

In [2]:
search_videos('how do I get rich with AI?')

[{'subtitles': ['and we have a special guest\n1:34 today BOS BOS is an *AI* and data engineer\n1:38 he specializes in moving *AI* projects\n1:40 from the good enough for demo'],
  'title': ['Data Intensive *AI*'],
  'video_id': 'BP6w_vKySN0'},
 {'subtitles': ['first but you still\n42:37 want to want to leverage *Ai* and you want\n42:39 to you know in introduce *AI* into\n42:43 services that your bank offers um of\n42'],
  'title': ['Trends in *AI* Infrastructure'],
  'video_id': '1aMuynlLM3o'},
 {'subtitles': ["0:00 this week we'll talk about *AI* for\n0:03 digital Healthcare and we have a special\n0:05 guest today Maria and by the way should\n0:07 I say Maria Lisa"],
  'title': ['*AI* for Digital Health'],
  'video_id': 'whpkDmVVGUE'},
 {'subtitles': ["okay this\n1:15 week we'll talk about bringing together\n1:16 research and Industry and how\n1:18 explainable and interpretable machine\n1:20 learning and *AI*"],
  'title': ['Interpretable *AI* and ML'],
  'video_id': 'EQcY83VA0Us'},
 

## Simple reasearch agent

In [3]:
from pydantic_ai import Agent
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

research_instructions = """
You're a helpful researcher agent.
""".strip()

research_agent = Agent(
    name='research_agent',
    instructions=research_instructions,
    model='openai:gpt-4o-mini',
    tools=[search_videos, get_subtitles_by_id]
)

result = await research_agent.run(
    user_prompt='how do I get rich with AI?'
)

print(result.output)

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable