In [1]:

# Import required libraries and tools
import os
import sys
import json
from pprint import pprint

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Add tools directory to path
sys.path.append('.')

# Import all our tools
from search_talks_by_filters import search_talks_by_filters
from search_talks_semantically import search_talks_semantically
from analyze_speaker_activity import analyze_speaker_activity
from get_talk_details import get_talk_details
from find_similar_content import find_similar_content
from analyze_topics_and_trends import analyze_topics_and_trends

print("✅ All tools imported successfully!")
print(f"🔑 ApertureDB Key configured: {'Yes' if os.getenv('APERTUREDB_KEY') else 'No'}")

# Helper function for pretty printing results
def print_results(result, max_results=5):
    """Pretty print tool results with truncation"""
    if isinstance(result, dict) and 'results' in result:
        print(f"Total found: {result.get('total_found', 'Unknown')}")
        if result.get('query_summary'):
            print(f"Query: {result['query_summary']}")
        if result.get('sort_info'):
            print(f"Sorting: {result['sort_info']}")
        
        results = result['results'][:max_results]
        for i, talk in enumerate(results, 1):
            print(f"\n{i}. {talk.get('title', 'No Title')}")
            print(f"   Speaker: {talk.get('speaker', 'Unknown')}")
            print(f"   Company: {talk.get('company', 'Unknown')}")
            print(f"   Views: {talk.get('views', 0):,}")
            if talk.get('published_date'):
                print(f"   Date: {talk['published_date']}")
    else:
        pprint(result)

✅ All tools imported successfully!
🔑 ApertureDB Key configured: Yes


# Test Scenarios for find_similar_content Tool

This notebook tests the `find_similar_content` tool with various realistic scenarios that demonstrate different ways users might want to find similar or related content in the MLOps events database.

## Tool Overview
The `find_similar_content` tool can:
- Find content similar to a specific talk (by title or ID)  
- Find content based on a natural language query
- Support different similarity types (content, speaker, topic, all)
- Apply various filters (date, category, event, speaker exclusions)
- Control similarity thresholds and result limits

### Test 1: Find Similar Content Using Natural Language Query - AI Agents

**Scenario**: User wants to find talks about AI agents and autonomous systems  
**Query**: "Find content about AI agents, autonomous systems, and agent-based architectures"

This test uses a natural language query to find semantically similar content about AI agents. The tool will use embedding similarity to find talks that discuss similar concepts even if they don't use the exact same terminology.

In [2]:
# Test 1: Natural Language Query - AI Agents
result1 = find_similar_content.invoke({
    "reference_query": "AI agents",
    "limit": 5
})
print("=== AI Agents Content Similarity ===")
print(result1)

=== AI Agents Content Similarity ===
{'similar_talks': [{'talk_id': 'f9b8beb1-45bb-560c-b75a-745ad5759cac', 'title': 'Understanding\xa0 Where Generative AI fits into Business', 'speaker': 'Miguel Mendez, Anand Nimkar, Preeti Shivpuri', 'youtube_url': 'https://www.youtube.com/watch?v=1l2W8YlvVM4', 'category': 'Business and stakeholder alignment', 'views': 299, 'similarity_score': 0.446, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.446)', 'matching_content': 'Understanding\xa0 Where Generative AI fits into Business | Additional Questions: How dangerous is Generative AI? | Understand where Generative AI fits into business | Business and stakeholder alignment |...'}, {'talk_id': 'dacfab95-57e8-5086-bc22-74b86cf3fbb3', 'title': 'GenAI Investing in 2024', 'speaker': 'Margo Wu', 'youtube_url': 'https://www.youtube.com/watch?v=QBNX72rckbc', 'category': 'Future trends', 'views': 20, 'similarity_score': 0.444, 'similarity_type': 'content', 'similarity_reas

### Test 2: Find Similar Content to a Specific Talk

**Scenario**: User knows about a specific talk and wants to find similar presentations  
**Reference Talk**: "LLMs, from Playgrounds to Production-ready Pipelines"  
**Query**: "Find talks similar to the LLMs production pipelines presentation"

This test demonstrates finding content similar to a specific talk by using the talk title as reference. The tool will analyze the talk's abstract and metadata to find semantically similar presentations.

In [3]:
# Test 2: Similar Content to Specific Talk
result2 = find_similar_content.invoke({
    "reference_talk_title": "LLMs, from Playgrounds to Production-ready Pipelines"
})
print("=== Similar to LLMs Production Pipeline Talk ===")
print(result2)

=== Similar to LLMs Production Pipeline Talk ===
{'similar_talks': [{'talk_id': 'c9d53afb-d1aa-595c-9b5a-fb09b89c29b8', 'title': 'OpenFL: A Federated Learning Project to Power (and secure) Your Projects', 'speaker': 'Ezequiel Lanza', 'youtube_url': 'https://www.youtube.com/watch?v=IpOIQ994vko', 'category': 'Security and Privacy', 'views': 344, 'similarity_score': 0.368, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.368)', 'matching_content': 'OpenFL: A Federated Learning Project to Power (and secure) Your Projects | OpenFL is a Python 3 framework for Federated Learning. Designed to be flexible, extensible and easily learnable tool for data...'}, {'talk_id': 'fc3efe32-e399-5dfc-9cd5-70912f545122', 'title': 'A Practical Guide to Efficient AI', 'speaker': 'Shelby Heinecke', 'youtube_url': 'https://www.youtube.com/watch?v=3HS0Cz_WFHM', 'category': 'Performance optimization and efficiency', 'views': 21, 'similarity_score': 0.363, 'similarity_type': 'co

### Test 3: Query with Date Filtering - Recent MLOps Trends

**Scenario**: User wants to find recent talks about MLOps practices from 2023 onwards  
**Query**: "Find recent content about MLOps deployment and monitoring practices"  
**Filter**: Only talks published from 2023 onwards

This test combines natural language semantic search with temporal filtering to focus on recent developments in MLOps practices.

In [4]:
# Test 3: Recent MLOps Content with Date Filter
result3 = find_similar_content.invoke({
    "reference_query": "MLOps deployment monitoring practices production systems",
    "date_from": "2023-01-01",
    "limit": 8
})
print("=== Recent MLOps Content (2023+) ===")
print(result3)

=== Recent MLOps Content (2023+) ===
{'similar_talks': [{'talk_id': '9cc32db2-9f03-591a-88e4-1b0def8fdba0', 'title': 'How do you scale to billions of fine-tuned LLMs?', 'speaker': 'Jamie Dborin', 'youtube_url': 'https://www.youtube.com/watch?v=JGzas8cfrtw', 'category': 'Performance optimization and efficiency', 'views': 49, 'similarity_score': 0.504, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.504)', 'matching_content': 'How do you scale to billions of fine-tuned LLMs? | Batched LORA Inference, a method gaining popularity that could be used to scale to billions of personalized, finetuned llms without paying the comput...'}, {'talk_id': '7da06749-38e6-59c8-b00d-e39b115894d0', 'title': 'Large Language Model Training and Serving at LinkedIn', 'speaker': 'Dre Olgiati', 'youtube_url': 'https://www.youtube.com/watch?v=yx_BKcAPoQs', 'category': 'Deployment and integration', 'views': 18, 'similarity_score': 0.502, 'similarity_type': 'content', 'similari

### Test 4: Find Similar Talks But Exclude Same Speaker

**Scenario**: User likes a talk but wants diverse perspectives from different speakers  
**Reference Talk**: "Multimodal Agents You Can Deploy Anywhere"  
**Query**: "Find similar talks but from different speakers for diverse viewpoints"

This test finds content similar to a reference talk but excludes talks by the same speaker to provide diverse perspectives on the topic.

In [5]:
# Test 4: Similar Content Excluding Same Speaker
result4 = find_similar_content.invoke({
    "reference_talk_title": "Multimodal Agents You Can Deploy Anywhere",
    "exclude_same_speaker": True,
    "limit": 6
})
print("=== Similar to Multimodal Agents (Different Speakers) ===")
print(result4)

=== Similar to Multimodal Agents (Different Speakers) ===
{'similar_talks': [{'talk_id': '5ec5c69a-ad4d-5105-9a02-b9d147b0e812', 'title': 'Open-Ended and AI-Generating Algorithms in the Era of Foundation Models', 'speaker': 'Jeff Clune', 'youtube_url': 'https://www.youtube.com/watch?v=N8EqPWRp5cg', 'category': 'Future trends', 'views': 18, 'similarity_score': 0.405, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.405)', 'matching_content': 'Open-Ended and AI-Generating Algorithms in the Era of Foundation Models | Open-Ended and AI-Generating Algorithms in the Era of Foundation Models\n\nFoundation models (e.g. large language models) create...'}, {'talk_id': '7da06749-38e6-59c8-b00d-e39b115894d0', 'title': 'Large Language Model Training and Serving at LinkedIn', 'speaker': 'Dre Olgiati', 'youtube_url': 'https://www.youtube.com/watch?v=yx_BKcAPoQs', 'category': 'Deployment and integration', 'views': 18, 'similarity_score': 0.403, 'similarity_type': 'c

### Test 5: High Similarity Threshold - Only Highly Related Content

**Scenario**: User wants to find only very closely related content, not loosely similar  
**Query**: "Vector databases and embeddings for RAG applications"  
**Threshold**: Minimum similarity of 0.8 (high threshold)

This test uses a high similarity threshold to return only very closely related content, filtering out loosely similar results.

In [6]:
# Test 5: High Similarity Threshold
result5 = find_similar_content.invoke({
    "reference_query": "AI Agents",
    "min_similarity": 0.3,
    "limit": 5
})
print("=== Highly Similar Content (min 0.8 similarity) ===")
print(result5)

=== Highly Similar Content (min 0.8 similarity) ===
{'similar_talks': [{'talk_id': 'dacfab95-57e8-5086-bc22-74b86cf3fbb3', 'title': 'GenAI Investing in 2024', 'speaker': 'Margo Wu', 'youtube_url': 'https://www.youtube.com/watch?v=QBNX72rckbc', 'category': 'Future trends', 'views': 20, 'similarity_score': 0.444, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.444)', 'matching_content': 'GenAI Investing in 2024 | An updated overview on the Gen Ai market landscape and investment activity, along with investor insights for fundraising. | An updated overview on the Gen Ai landscape and in...'}, {'talk_id': 'f9b8beb1-45bb-560c-b75a-745ad5759cac', 'title': 'Understanding\xa0 Where Generative AI fits into Business', 'speaker': 'Miguel Mendez, Anand Nimkar, Preeti Shivpuri', 'youtube_url': 'https://www.youtube.com/watch?v=1l2W8YlvVM4', 'category': 'Business and stakeholder alignment', 'views': 299, 'similarity_score': 0.443, 'similarity_type': 'content', 'sim

### Test 6: Category-Specific Similarity Search

**Scenario**: User wants similar talks but only within a specific category  
**Query**: "Find content about deployment strategies"  
**Filter**: Only talks in "Deployment and integration" category

This test constrains the similarity search to a specific category, useful when users want to stay within a particular domain or track.

In [7]:
# Test 6: Category-Specific Search
result6 = find_similar_content.invoke({
    "reference_query": "deployment strategies production systems CI/CD pipelines",
    "category": "Deployment and integration"
})
print("=== Deployment Content (Category Filtered) ===")
print(result6)

=== Deployment Content (Category Filtered) ===
{'similar_talks': [{'talk_id': '6b6b06f2-551b-5e81-8f5e-598c9b5e32d4', 'title': 'Deploying LLMs on Kubernetes environments', 'speaker': 'Arthur Vitui', 'youtube_url': 'https://www.youtube.com/watch?v=FoEWvU5UbsA', 'category': 'Deployment and integration', 'views': 46, 'similarity_score': 0.494, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.494)', 'matching_content': 'Deploying LLMs on Kubernetes environments | Learn how to deploy LLMs on Kubernetes environments and use them to enhance your intelligent applications ecosystem with chatbots to talk to your documentat...'}, {'talk_id': '7da06749-38e6-59c8-b00d-e39b115894d0', 'title': 'Large Language Model Training and Serving at LinkedIn', 'speaker': 'Dre Olgiati', 'youtube_url': 'https://www.youtube.com/watch?v=yx_BKcAPoQs', 'category': 'Deployment and integration', 'views': 18, 'similarity_score': 0.49, 'similarity_type': 'content', 'similarity_reason':

### Test 7: Event-Specific Similar Content 

**Scenario**: User wants to explore similar content but only from a specific event  
**Query**: Find content about model optimization techniques  
**Filter**: Only talks from "MLOps & GenAI World 2024" event

This test demonstrates event-based filtering, useful when users want to see how a topic was covered within a particular conference or event.

In [8]:
# Test 7: Event-Specific Content
result7 = find_similar_content.invoke({
    "reference_query": "model optimization performance tuning efficiency",
    "event_name": "MLOps & GenAI World 2024"
})
print("=== Model Optimization (MLOps & GenAI World 2024) ===")
print(result7)

=== Model Optimization (MLOps & GenAI World 2024) ===
{'similar_talks': [{'talk_id': 'fc3efe32-e399-5dfc-9cd5-70912f545122', 'title': 'A Practical Guide to Efficient AI', 'speaker': 'Shelby Heinecke', 'youtube_url': 'https://www.youtube.com/watch?v=3HS0Cz_WFHM', 'category': 'Performance optimization and efficiency', 'views': 21, 'similarity_score': 0.482, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.482)', 'matching_content': 'A Practical Guide to Efficient AI | In the past two years, we’ve witnessed a whirlwind of AI breakthroughs powered by extremely large and resource-demanding models. And as engineers and practitioners,...'}, {'talk_id': '7da06749-38e6-59c8-b00d-e39b115894d0', 'title': 'Large Language Model Training and Serving at LinkedIn', 'speaker': 'Dre Olgiati', 'youtube_url': 'https://www.youtube.com/watch?v=yx_BKcAPoQs', 'category': 'Deployment and integration', 'views': 18, 'similarity_score': 0.464, 'similarity_type': 'content', 'sim

### Test 8: Speaker-Based Similarity - Other Talks by Same Speaker

**Scenario**: User likes a speaker's presentation style and wants to see their other talks  
**Reference Talk**: Use a known talk title as reference  
**Similarity Type**: "speaker" - focuses on finding other talks by the same speaker

This test demonstrates speaker-based similarity where the tool finds other talks by the same speaker, useful for discovering a speaker's body of work.

In [9]:
# Test 8: Speaker-Based Similarity
result8 = find_similar_content.invoke({
    "reference_talk_title": "Looking into AI/ML from a Venture Capital lens",
    "similarity_type": "speaker"
})
print("=== Other Talks by Same Speaker ===")
print(result8)

=== Other Talks by Same Speaker ===
{'similar_talks': [], 'reference_info': {'talk_id': 'da323948-1140-5d4c-89ed-2ed9ec6342ef', 'title': 'Looking into AI/ML from a Venture Capital lens', 'speaker': 'Yatong Li, Ryan Shannon, Vik Pant, PhD, Michelle Yu', 'category': 'Business and stakeholder alignment', 'youtube_url': 'https://www.youtube.com/watch?v=6X0sqZ7Metg', 'abstract': "As a Toronto based Canadian VC focusing on global investments, Sixty Degree Capital invests in software that’s transforming industries, and the digital infrastructure that supports it. We have invested in lots of exciting portfolio companies including Arctic Wolf in the Cybersecurity space, DataGrail innovating on Data Privacy, Paperspace providing MLOps platform, MacroMeta solving Edge Computing bottlenecks, Pragma work in the Gaming Infrastructure space, Tact.ai and Radius Agent as Vertical SaaS products etc.). We believe AI/ML is a fundamental building block for next generation software, so Sixty Degree has spen

### Test 9: Comprehensive Similarity Analysis - All Types

**Scenario**: User wants a comprehensive analysis including content, speaker, and topic similarity  
**Reference Talk**: "Deploying and Evaluating RAG pipelines with Lightning Studios"  
**Similarity Type**: "all" - combines multiple similarity approaches

This test uses the comprehensive "all" similarity type that combines content-based, speaker-based, and topic-based similarity for a holistic recommendation.

In [10]:
# Test 9: Comprehensive Similarity Analysis
result9 = find_similar_content.invoke({
    "reference_talk_title": "Era of Multimodal AI & Reasoning",
    "similarity_type": "all",
    "limit": 12
})
print("=== Comprehensive Similarity Analysis ===")
print(result9)

=== Comprehensive Similarity Analysis ===
{'similar_talks': [{'talk_id': '4b5ed1f3-6dc6-5b0f-9dfb-03f9e027f218', 'title': 'AI Tools Under Control: Keeping Your Agents Secure and Reliable', 'speaker': 'Bar Chen', 'youtube_url': 'https://www.youtube.com/watch?v=poqhv4hPTpA', 'category': 'Security and Privacy', 'views': 119, 'similarity_score': 0.408, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.408)', 'matching_content': 'AI Tools Under Control: Keeping Your Agents Secure and Reliable | This session focuses on AI tools and the importance of keeping them secure and reliable. We’ll discuss the main security challenges th...'}, {'talk_id': '8a1b1e9a-6b99-5282-9032-61bf3da02ccb', 'title': 'Unraveling Long Context: Existing Methods, Challenges, and Future Directions', 'speaker': 'Bowen Yang', 'youtube_url': 'https://www.youtube.com/watch?v=jT0g7-GUMGY', 'category': 'Model dev, training, arch.', 'views': 35, 'similarity_score': 0.407, 'similarity_type': 

### Test 10: Combined Filters - Date Range + Category + Exclusions

**Scenario**: Complex search with multiple constraints  
**Query**: "Data science and machine learning workflows"  
**Filters**: 2022-2023 timeframe, MLOps category, exclude same speaker, minimum similarity

This test demonstrates the tool's ability to handle multiple constraints simultaneously for highly targeted similarity searches.

In [14]:
# Test 10: Complex Multi-Filter Search
result10 = find_similar_content.invoke({
    "reference_query": "LLMs in Production Pipelines",
    "date_from": "2024-01-01",
    "date_to": "2024-12-31",
    "category": "Deployment and integration",
    "min_similarity": 0.3,
    "limit": 7
})
print("=== Complex Multi-Filter Search ===")
print(result10)

=== Complex Multi-Filter Search ===
{'similar_talks': [{'talk_id': '0c7e727c-214e-5fc1-954e-3aa19bd8a513', 'title': 'Deploying and Evaluating RAG pipelines with Lightning Studios', 'speaker': 'Rob Levy', 'youtube_url': 'https://www.youtube.com/watch?v=JO2mJ00rCkU', 'category': 'Deployment and integration', 'views': 93, 'similarity_score': 0.442, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.442)', 'matching_content': 'Deploying and Evaluating RAG pipelines with Lightning Studios | Learn how to use Lightning Studios to quickly deploy AI agents and accelerate your evaluation of RAG pipelines. | Learn how to use Light...'}, {'talk_id': 'e2da4021-9d57-5dc4-8e2a-9f37948d1fd5', 'title': 'Serving GenAI Workload At Scale With LitServe', 'speaker': 'Aniket Maurya', 'youtube_url': 'https://www.youtube.com/watch?v=ClA0OwE4Zs0', 'category': 'Deployment and integration', 'views': 33, 'similarity_score': 0.437, 'similarity_type': 'content', 'similarity_reason':

### Test 11: Cloud and Infrastructure Focus

**Scenario**: User wants to explore cloud computing and infrastructure content  
**Query**: "Cloud infrastructure, Kubernetes, containerization, and scalability"  
**Approach**: Semantic search focused on cloud technologies

This test targets cloud and infrastructure-related content using semantic similarity to find talks about cloud platforms, orchestration, and scalability.

In [12]:
# Test 11: Cloud and Infrastructure Content
result11 = find_similar_content.invoke({
    "reference_query": "cloud infrastructure Kubernetes containerization Docker scalability microservices",
    "limit": 8
})
print("=== Cloud and Infrastructure Content ===")
print(result11)

=== Cloud and Infrastructure Content ===
{'similar_talks': [{'talk_id': '7da06749-38e6-59c8-b00d-e39b115894d0', 'title': 'Large Language Model Training and Serving at LinkedIn', 'speaker': 'Dre Olgiati', 'youtube_url': 'https://www.youtube.com/watch?v=yx_BKcAPoQs', 'category': 'Deployment and integration', 'views': 18, 'similarity_score': 0.463, 'similarity_type': 'content', 'similarity_reason': 'Similar content themes (score: 0.463)', 'matching_content': 'Large Language Model Training and Serving at LinkedIn | In this talk, Dre will describe some of the fundamental challenges and solutions faced by the LinkedIn team as they build innovative products ba...'}, {'talk_id': '02f5e445-8821-5cb1-bada-88586b0f9ac8', 'title': 'Leverage Kubernetes To Optimize the Utilization of Your AI Accelerators', 'speaker': 'Nathan Beach', 'youtube_url': 'https://www.youtube.com/watch?v=5jdZksHaJ_Q', 'category': 'Performance optimization and efficiency', 'views': 12, 'similarity_score': 0.462, 'similarity_

### Test 12: Error Handling - Non-Existent Talk Reference

**Scenario**: Test error handling when referencing a talk that doesn't exist  
**Reference Talk**: "This Talk Does Not Exist in Database"  
**Expected**: Graceful error handling with informative message

This test validates the tool's error handling capabilities when provided with invalid references, ensuring robust behavior in edge cases.

In [13]:
# Test 12: Error Handling Test
result12 = find_similar_content.invoke({
    "reference_talk_title": "This Talk Does Not Exist in Database"
})
print("=== Error Handling Test ===")
print(result12)

=== Error Handling Test ===
{'similar_talks': [], 'reference_info': {}, 'similarity_analysis': 'Similarity analysis failed', 'total_found': 0, 'filters_applied': [], 'success': False, 'error': '0'}
