In [1]:
# Import required libraries and tools
import os
import sys
import json
from pprint import pprint

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Add tools directory to path
sys.path.append('.')

# Import all our tools
from search_talks_by_filters import search_talks_by_filters
from search_talks_semantically import search_talks_semantically
from analyze_speaker_activity import analyze_speaker_activity
from get_talk_details import get_talk_details
from find_similar_content import find_similar_content
from analyze_topics_and_trends import analyze_topics_and_trends

print("✅ All tools imported successfully!")
print(f"🔑 ApertureDB Key configured: {'Yes' if os.getenv('APERTUREDB_KEY') else 'No'}")

# Helper function for pretty printing results
def print_results(result, max_results=5):
    """Pretty print tool results with truncation"""
    if isinstance(result, dict) and 'results' in result:
        print(f"Total found: {result.get('total_found', 'Unknown')}")
        if result.get('query_summary'):
            print(f"Query: {result['query_summary']}")
        if result.get('sort_info'):
            print(f"Sorting: {result['sort_info']}")
        
        results = result['results'][:max_results]
        for i, talk in enumerate(results, 1):
            print(f"\n{i}. {talk.get('title', 'No Title')}")
            print(f"   Speaker: {talk.get('speaker', 'Unknown')}")
            print(f"   Company: {talk.get('company', 'Unknown')}")
            print(f"   Views: {talk.get('views', 0):,}")
            if talk.get('published_date'):
                print(f"   Date: {talk['published_date']}")
    else:
        pprint(result)

✅ All tools imported successfully!
🔑 ApertureDB Key configured: Yes


## Tool 4: `get_talk_details` - Comprehensive Talk Deep Dive Testing

This tool provides comprehensive details about specific MLOps talks including metadata, transcript chunks, speaker information, and optionally related talks. It handles both title-based and ID-based lookups with flexible transcript filtering by time ranges.

### Test 1: Basic Talk Information Retrieval
**Query**: "Get complete details about the talk 'LLMs, from Playgrounds to Production-ready Pipelines'"

This performs a basic lookup by exact title to retrieve all metadata including speaker info, abstract, view counts, and technical details. No transcript or related talks included for focused metadata analysis.

In [2]:
# Test 1: Basic Talk Information Retrieval
result1 = get_talk_details.invoke({
    "talk_title": "LLMs, from Playgrounds to Production-ready Pipelines"
})

print("=== BASIC TALK DETAILS ===")
pprint(result1)

=== BASIC TALK DETAILS ===
{'related_talks': [],
 'success': True,
 'summary': "Retrieved details for talk 'LLMs, from Playgrounds to "
            "Production-ready Pipelines'",
 'talk_info': {'abstract': 'Despite the onset of commercially viable '
                           'open-source Large Langauge Models, companies are '
                           'struggling to leverage cutting-edge models like '
                           'Llama2 and Mistral 7B for production-ready '
                           'applications. Creating a simple demo page on a '
                           'personal laptop and training, fine-tuning, and '
                           'serving multi-billion parameter LLMs on HPC-scale '
                           'infrastructure - with proprietary enterprise data '
                           '- involves an entirely different engineering '
                           'challenge. In this session, Intae, who co-founded '
                           'and now leads product d

### Test 2: Talk with Full Transcript Access
**Query**: "Get the multimodal agents talk with complete transcript for content analysis"

This retrieves a talk with full transcript chunks included, useful for analyzing the actual spoken content, finding specific discussions, and understanding the flow of the presentation.

In [3]:
# Test 2: Talk with Full Transcript Access
result2 = get_talk_details.invoke({
    "talk_title": "Making ChatGPT funny with Prompt Optimization",
    "include_transcript": True,
    "max_chunks": 15
})

print("=== TALK WITH TRANSCRIPT ===")
pprint(result2)

=== TALK WITH TRANSCRIPT ===
{'related_talks': [],
 'success': True,
 'summary': "Retrieved details for talk 'Making ChatGPT funny with Prompt "
            "Optimization' | 15 transcript chunks",
 'talk_info': {'abstract': 'A recent study found ChatGPT repeated the same 25 '
                           'jokes 90% of the time. As is often the case with '
                           'popular narratives about the limits of AI, ChatGPT '
                           'is capable of so much more... you just have to '
                           'know how to ask! Using principles of prompt '
                           'engineering, I try to get ChatGPT to make you '
                           'laugh, while arming you with techniques for '
                           'overcoming similar supposed limitations, when '
                           'working with AI.',
               'category': 'Introduction to MLOps and GenAI',
               'company': 'Saxifrage',
               'duration': 'Unknown',


### Test 3: Time-Specific Transcript Analysis
**Query**: "Show me what was discussed in the first 5 minutes (0-300 seconds) of the RAG deployment talk"

This demonstrates time-filtered transcript access to analyze specific sections of a talk, useful for understanding introductions, key concepts, or specific time ranges mentioned by users.

In [4]:
# Test 3: Time-Specific Transcript Analysis
result3 = get_talk_details.invoke({
    "talk_title": "Deploying and Evaluating RAG pipelines with Lightning Studios",
    "include_transcript": True,
    "time_start": 0,
    "time_end": 300,
    "max_chunks": 10
})

print("=== FIRST 5 MINUTES ANALYSIS ===")
pprint(result3)

=== FIRST 5 MINUTES ANALYSIS ===
{'related_talks': [],
 'success': True,
 'summary': "Retrieved details for talk 'Deploying and Evaluating RAG "
            "pipelines with Lightning Studios' | 10 transcript chunks | with "
            'time filtering',
 'talk_info': {'abstract': 'Learn how to use Lightning Studios to quickly '
                           'deploy AI agents and accelerate your evaluation of '
                           'RAG pipelines.',
               'category': 'Deployment and integration',
               'company': 'Lightning AI',
               'duration': 'Unknown',
               'event': 'TMLS 2024',
               'industries': 'Automotive, Banking & Financial Services, '
                             'Computer Software, Environmental Services, Food '
                             '& Beverages, Hospital & Health Care, Information '
                             'Technology & Service, Insurance',
               'keywords': None,
               'published_date': '2024

### Test 4: Talk Details with Related Content Discovery
**Query**: "Get details about the Memory Optimizations for Machine Learning talk and find similar presentations for comprehensive research"

This combines talk details with semantic similarity search to find related talks, useful for comprehensive topic research and discovering related content from different speakers.

In [5]:
# Test 4: Talk Details with Related Content Discovery
result4 = get_talk_details.invoke({
    "talk_title": "Memory Optimizations for Machine Learning",
    "include_related": True,
    "related_count": 6
})

print("=== TALK WITH RELATED CONTENT ===")
pprint(result4)

=== TALK WITH RELATED CONTENT ===
{'related_talks': [{'category': 'Performance optimization and efficiency',
                    'similarity_score': 0.303,
                    'speaker': 'Sri  Raghu Malireddi',
                    'title': 'On-Device ML for LLMs: Post-training '
                             'Optimization Techniques with T5 and Beyond',
                    'views': 252,
                    'youtube_url': 'https://www.youtube.com/watch?v=QRPvr1td4s0'},
                   {'category': 'Deployment and integration',
                    'similarity_score': 0.329,
                    'speaker': 'Jaeman An',
                    'title': 'Mastering Enterprise-Grade LLM Deployment: '
                             'Overcoming Production Challenges',
                    'views': 11,
                    'youtube_url': 'https://www.youtube.com/watch?v=dRgwzUk1s-g'},
                   {'category': 'Deployment and integration',
                    'similarity_score': 0.335,
          

### Test 5: Middle Section Transcript Analysis
**Query**: "Analyze the middle section (10-20 minutes) of the ML production pipeline talk to understand the core technical content"

This demonstrates advanced time filtering to focus on the main technical content of a talk, skipping introductions and conclusions to analyze the core material.

In [7]:
# Test 5: Middle Section Transcript Analysis
result5 = get_talk_details.invoke({
    "talk_title": "From ML Repository to ML Production Pipeline",
    "include_transcript": True,
    "time_start": 600,  # 10 minutes
    "time_end": 1200,   # 20 minutes
    "max_chunks": 12
})

print("=== MIDDLE SECTION ANALYSIS (10-20 mins) ===")
pprint(result5)

=== MIDDLE SECTION ANALYSIS (10-20 mins) ===
{'related_talks': [],
 'success': True,
 'summary': "Retrieved details for talk 'From ML Repository to ML Production "
            "Pipeline' | 10 transcript chunks | with time filtering",
 'talk_info': {'abstract': 'In the pRED MLOps team, we collaborate closely '
                           'with research scientists to transition their '
                           'machine learning models into a production '
                           'environment seamlessly. Through our efforts, we '
                           'have developed a robust framework that '
                           'standardises and scales this process effectively. '
                           'In this talk, we will provide an in-depth look at '
                           'our framework, the tools we leverage, and the '
                           'challenges we overcome in this journey.',
               'category': 'Deployment and integration',
               'company': 'Roche

### Test 6: Comprehensive Analysis with All Features
**Query**: "Get everything about the optimization deployment talk - metadata, transcript, and related talks for complete analysis"

This demonstrates the tool's full capabilities by retrieving complete talk information, transcript chunks, and related content in a single comprehensive query.

In [6]:
# Test 6: Comprehensive Analysis with All Features
result6 = get_talk_details.invoke({
    "talk_title": "Evaluation Techniques for Large Language Models",
    "include_transcript": True,
    "include_related": True,
    "max_chunks": 8,
    "related_count": 4
})

print("=== COMPREHENSIVE ANALYSIS ===")
pprint(result6)

=== COMPREHENSIVE ANALYSIS ===
{'related_talks': [{'category': 'Model dev, training, arch.',
                    'similarity_score': 0.37,
                    'speaker': 'Aniket Maurya',
                    'title': 'Finetuning a large language model on a custom '
                             'dataset',
                    'views': 1110,
                    'youtube_url': 'https://www.youtube.com/watch?v=OsD0KkyYXoY'},
                   {'category': 'Model dev, training, arch.',
                    'similarity_score': 0.372,
                    'speaker': 'Rohit Saha, Kyryl Truskovskyi, Angeline '
                               'Yasodhara, Benjamin Ye',
                    'title': 'Leveraging Large Language Models to build '
                             'Enterprise AI',
                    'views': 145,
                    'youtube_url': 'https://www.youtube.com/watch?v=BTYL-kLmhFE'},
                   {'category': 'Model dev, training, arch.',
                    'similarity_score'

### Test 9: Late-Stage Content Analysis
**Query**: "Analyze the final 10 minutes of a talk to understand conclusions, Q&A, and key takeaways"

This focuses on the end of a presentation to capture conclusions, final insights, and Q&A sessions which often contain valuable practical insights and audience interactions.

In [9]:
# Test 9: Late-Stage Content Analysis  
result9 = get_talk_details.invoke({
    "talk_title": "LLMs, Big Data, and Audio: Breaching an Untapped Gold Mine",
    "include_transcript": True,
    "time_start": 1800,  # Last 10 minutes (assuming ~30 min talk)
    "max_chunks": 8
})

print("=== FINAL 10 MINUTES ANALYSIS ===")
pprint(result9)

=== FINAL 10 MINUTES ANALYSIS ===
{'related_talks': [],
 'success': True,
 'summary': "Retrieved details for talk 'LLMs, Big Data, and Audio: Breaching "
            "an Untapped Gold Mine' | 0 transcript chunks | with time "
            'filtering',
 'talk_info': {'abstract': 'Large language models like those in the GPT and '
                           'Llama series are primarily trained on massive '
                           'amounts of *text* data. However, the vast majority '
                           "of language and communication doesn't take place "
                           'over text, but rather through voice. Cues in vocal '
                           'tone carry information that the plaintext cannot '
                           "convey—think about the last time you've witnessed "
                           'or experienced a miscommunication over '
                           'text/email/Slack. Thus, in this talk, I argue that '
                           'training language

### Test 10: Brief Transcript Sample for Quick Overview
**Query**: "Get just a few transcript chunks from the Kubernetes talk for a quick content preview"

This demonstrates using minimal transcript chunks for quick content sampling, useful when you want a brief overview without extensive transcript analysis.

In [10]:
# Test 10: Brief Transcript Sample for Quick Overview
result10 = get_talk_details.invoke({
    "talk_title": "Evaluation Techniques for Large Language Models",
    "include_transcript": True,
    "max_chunks": 3
})

print("=== BRIEF TRANSCRIPT SAMPLE ===")
pprint(result10)

=== BRIEF TRANSCRIPT SAMPLE ===
{'related_talks': [],
 'success': True,
 'summary': "Retrieved details for talk 'Evaluation Techniques for Large "
            "Language Models' | 3 transcript chunks",
 'talk_info': {'abstract': 'Large language models (LLMs) represent an exciting '
                           'trend in AI, with many new commercial and '
                           'open-source models released recently. However, '
                           'selecting the right LLM for your needs has become '
                           'increasingly complex. This tutorial provides data '
                           'scientists and machine learning engineers with '
                           'practical tools and best practices for evaluating '
                           'and choosing LLMs.\n'
                           '\n'
                           'The tutorial will cover the existing research on '
                           'the capabilities of LLMs versus small traditional '
           

### Test 11: Error Handling - Non-Existent Talk
**Query**: "Test error handling by requesting details for a talk that doesn't exist"

This tests the tool's error handling capabilities when a talk title is not found, ensuring graceful failure modes and informative error messages.

In [18]:
# Test 11: Error Handling - Non-Existent Talk
result11 = get_talk_details.invoke({
    "talk_title": "This Talk Does Not Exist In The Database"
})

print("=== ERROR HANDLING TEST ===")
pprint(result11)

=== ERROR HANDLING TEST ===
{'error': '0',
 'related_talks': [],
 'success': False,
 'summary': 'Failed to retrieve talk details',
 'talk_info': {},
 'transcript_chunks': [],
 'transcript_stats': {}}


### Test 12: Input Validation - Missing Parameters
**Query**: "Test input validation by calling the tool without required parameters"

This tests the tool's input validation to ensure proper error messages when neither talk_title nor talk_id is provided, validating the parameter requirements.

In [19]:
# Test 12: Input Validation - Missing Parameters
print("\n=== Test 12: Input Validation - Missing Parameters ===")
try:
    result = get_talk_details.invoke({})
    print(f"Result: {result}")
except Exception as e:
    print(f"Expected error: {e}")
    print("✅ Tool properly validated required parameters")


=== Test 12: Input Validation - Missing Parameters ===
Result: {'talk_info': {}, 'transcript_chunks': [], 'related_talks': [], 'summary': 'Error: Either talk_title or talk_id must be provided', 'transcript_stats': {}, 'success': False, 'error': "Missing required parameter: either 'talk_title' or 'talk_id' must be specified"}
