In [1]:
# Import required libraries and tools
import os
import sys
import json
from pprint import pprint

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Add tools directory to path
sys.path.append('.')

# Import all our tools
from search_talks_by_filters import search_talks_by_filters
from search_talks_semantically import search_talks_semantically
from analyze_speaker_activity import analyze_speaker_activity
from get_talk_details import get_talk_details
from find_similar_content import find_similar_content
from analyze_topics_and_trends import analyze_topics_and_trends

print("✅ All tools imported successfully!")
print(f"🔑 ApertureDB Key configured: {'Yes' if os.getenv('APERTUREDB_KEY') else 'No'}")

# Helper function for pretty printing results
def print_results(result, max_results=5):
    """Pretty print tool results with truncation"""
    if isinstance(result, dict) and 'results' in result:
        print(f"Total found: {result.get('total_found', 'Unknown')}")
        if result.get('query_summary'):
            print(f"Query: {result['query_summary']}")
        if result.get('sort_info'):
            print(f"Sorting: {result['sort_info']}")
        
        results = result['results'][:max_results]
        for i, talk in enumerate(results, 1):
            print(f"\n{i}. {talk.get('title', 'No Title')}")
            print(f"   Speaker: {talk.get('speaker', 'Unknown')}")
            print(f"   Company: {talk.get('company', 'Unknown')}")
            print(f"   Views: {talk.get('views', 0):,}")
            if talk.get('published_date'):
                print(f"   Date: {talk['published_date']}")
    else:
        pprint(result)

✅ All tools imported successfully!
🔑 ApertureDB Key configured: Yes


## Tool 3: `analyze_speaker_activity` - Comprehensive Speaker Analytics Testing

This tool analyzes speaker activity patterns, frequency, topics, and company representation across the MLOps events dataset. It provides comprehensive speaker-centric analytics including talk counts, topic diversity, company breakdown, and repeat speaker identification.

### Test 1: Individual Speaker Deep Dive Analysis
**Query**: "Analyze all activity and talks by Chip Huyen - how many talks, topics covered, engagement metrics"

This performs a detailed analysis of a specific speaker, showing their complete talk history, view statistics, topic diversity, and presentation timeline. 

In [2]:
# Test 1: Individual Speaker Deep Dive Analysis
result1 = analyze_speaker_activity.invoke({
    "speaker_name": "Chip Huyen", 
    "analysis_type": "all"
})

print("=== INDIVIDUAL SPEAKER ANALYSIS ===")
pprint(result1)

=== INDIVIDUAL SPEAKER ANALYSIS ===
{'analysis_summary': "Detailed analysis for speaker 'Chip Huyen'",
 'company_breakdown': {},
 'repeat_speakers': [],
 'speaker_stats': [{'avg_views': 844,
                    'categories_covered': ['Model dev, training, arch.'],
                    'companies': ['Claypot AI'],
                    'events_participated': ['TMLS 2022'],
                    'speaker_name': 'Chip Huyen',
                    'talk_list': [{'abstract': 'Fresh data beats stale data '
                                               'for machine learning '
                                               'applications. This talk '
                                               'discusses the value of fresh '
                                               'data as well as different '
                                               'types of architecture ...',
                                   'category': 'Model dev, training, arch.',
                                   'date': '202

### Test 2: Repeat Speaker Discovery
**Query**: "Find all speakers who presented multiple times (2+ talks) and rank by frequency"

This identifies repeat speakers in the dataset, showing who are the most active presenters. The min_talk_count=2 filter ensures we only get speakers with multiple presentations.

In [3]:
# Test 2: Repeat Speaker Discovery
result2 = analyze_speaker_activity.invoke({
    "min_talk_count": 2,
    "analysis_type": "all",
    "limit": 15
})

print("=== REPEAT SPEAKERS ANALYSIS ===")
pprint(result2)

=== REPEAT SPEAKERS ANALYSIS ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     'representation',
 'company_breakdown': [{'company': 'Aporia', 'speaker_count': 7},
                       {'company': 'Outerbounds', 'speaker_count': 5},
                       {'company': 'Lightning AI', 'speaker_count': 4},
                       {'company': 'Loblaw Digital', 'speaker_count': 4},
                       {'company': 'Scotiabank', 'speaker_count': 4},
                       {'company': 'Google', 'speaker_count': 3},
                       {'company': 'Wealthsimple', 'speaker_count': 3},
                       {'company': 'RBC', 'speaker_count': 3},
                       {'company': 'Armilla AI', 'speaker_count': 3},
                       {'company': 'Hugging Face', 'speaker_count': 3},
                       {'company': 'Vector Institute', 'speaker_count': 3},
                       {'company': 'Seldon', 'speaker_count': 3},
          

### Test 3: Corporate Representation Analysis
**Query**: "Analyze speaker representation from Google - how many speakers, their talk counts, and engagement"

This focuses on speakers from a specific company to understand corporate representation in the conference. Useful for analyzing which companies are most active in the MLOps community.

In [4]:
# Test 3: Corporate Representation Analysis
result3 = analyze_speaker_activity.invoke({
    "company_name": "Google",
    "analysis_type": "all",
    "limit": 10
})

print("=== GOOGLE SPEAKERS ANALYSIS ===")
pprint(result3)

=== GOOGLE SPEAKERS ANALYSIS ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     "representation, filtered by company 'Google'",
 'company_breakdown': [{'company': 'Google', 'speaker_count': 3}],
 'repeat_speakers': [],
 'speaker_stats': [{'avg_views': 12,
                    'categories': ['Performance optimization and efficiency'],
                    'companies': ['Google'],
                    'events': ['MLOps & GenAI World 2024'],
                    'speaker_name': 'Nathan Beach',
                    'total_talks': 1,
                    'total_views': 12},
                   {'avg_views': 26,
                    'categories': ['Deployment and integration'],
                    'companies': ['Google'],
                    'events': ['MLOps & GenAI World 2024'],
                    'speaker_name': 'Anu Reddy',
                    'total_talks': 1,
                    'total_views': 26},
                   {'avg_views': 2372,
  

### Test 4: Overall Company Breakdown Analysis
**Query**: "Show me the company representation breakdown - which companies have the most speakers"

This provides a comprehensive view of company representation across all talks, showing which organizations are most represented in the MLOps community. Focuses on companies analysis type.

In [5]:
# Test 4: Overall Company Breakdown Analysis
result4 = analyze_speaker_activity.invoke({
    "analysis_type": "companies",
    "limit": 25
})

print("=== COMPANY BREAKDOWN ANALYSIS ===")
pprint(result4)

=== COMPANY BREAKDOWN ANALYSIS ===
{'analysis_summary': 'Speaker activity analysis: company representation',
 'company_breakdown': [{'company': 'Aporia', 'speaker_count': 7},
                       {'company': 'Outerbounds', 'speaker_count': 5},
                       {'company': 'Lightning AI', 'speaker_count': 4},
                       {'company': 'Loblaw Digital', 'speaker_count': 4},
                       {'company': 'Scotiabank', 'speaker_count': 4},
                       {'company': 'Google', 'speaker_count': 3},
                       {'company': 'Wealthsimple', 'speaker_count': 3},
                       {'company': 'RBC', 'speaker_count': 3},
                       {'company': 'Armilla AI', 'speaker_count': 3},
                       {'company': 'Hugging Face', 'speaker_count': 3},
                       {'company': 'Vector Institute', 'speaker_count': 3},
                       {'company': 'Seldon', 'speaker_count': 3},
                       {'company': 'DVC', 'speaker_co

### Test 5: Recent Speaker Activity Analysis (2024)
**Query**: "Analyze speaker activity for 2024 - who were the most active recent presenters"

This filters speaker activity to recent timeframes to understand current trends and identify speakers who have been active in 2024. Useful for finding current thought leaders.

In [6]:
# Test 5: Recent Speaker Activity Analysis (2024)
result5 = analyze_speaker_activity.invoke({
    "date_from": "2024-01-01",
    "analysis_type": "all",
    "limit": 12
})

print("=== 2024 SPEAKER ACTIVITY ===")
pprint(result5)

=== 2024 SPEAKER ACTIVITY ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     'representation, filtered by from 2024-01-01',
 'company_breakdown': [{'company': 'Google', 'speaker_count': 3},
                       {'company': 'Lightning AI', 'speaker_count': 3},
                       {'company': 'Outerbounds', 'speaker_count': 3},
                       {'company': 'Aporia', 'speaker_count': 3},
                       {'company': 'AI Makerspace', 'speaker_count': 3},
                       {'company': 'Wealthsimple', 'speaker_count': 2},
                       {'company': 'LakeFS', 'speaker_count': 2},
                       {'company': 'Grammarly', 'speaker_count': 2},
                       {'company': 'Google Cloud', 'speaker_count': 2},
                       {'company': 'TitanML', 'speaker_count': 2},
                       {'company': 'Weights & Biases', 'speaker_count': 2},
                       {'company': 'Arya.ai', 'speak

### Test 6: Event-Specific Speaker Analysis
**Query**: "Analyze speakers from MLOps & GenAI World 2024 event - who presented and how many talks"

This focuses on speaker activity within a specific event, useful for understanding event participation patterns and identifying key contributors to particular conferences.

In [7]:
# Test 6: Event-Specific Speaker Analysis
result6 = analyze_speaker_activity.invoke({
    "event_name": "MLOps & GenAI World 2024",
    "analysis_type": "all",
    "limit": 15
})

print("=== EVENT-SPECIFIC SPEAKER ANALYSIS ===")
pprint(result6)

=== EVENT-SPECIFIC SPEAKER ANALYSIS ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     "representation, filtered by event 'MLOps & GenAI World "
                     "2024'",
 'company_breakdown': [{'company': 'Google', 'speaker_count': 2},
                       {'company': 'Google Cloud', 'speaker_count': 2},
                       {'company': 'Weights & Biases', 'speaker_count': 2},
                       {'company': 'Arya.ai', 'speaker_count': 2},
                       {'company': 'Outerbounds', 'speaker_count': 2},
                       {'company': 'AI Makerspace', 'speaker_count': 2},
                       {'company': 'Aporia', 'speaker_count': 2},
                       {'company': 'Roche', 'speaker_count': 1},
                       {'company': 'Liquid AI', 'speaker_count': 1},
                       {'company': 'Intuit', 'speaker_count': 1},
                       {'company': 'SambaNova Systems', 'speaker_count': 1},
   

### Test 7: Category-Focused Speaker Analysis
**Query**: "Who are the most active speakers in Model dev, training, arch. category - find domain experts"

This analyzes speaker activity within a specific topic category to identify domain experts and thought leaders in particular areas like MLOps, deployment, or future trends.

In [8]:
# Test 7: Category-Focused Speaker Analysis
result7 = analyze_speaker_activity.invoke({
    "category": "Model dev, training, arch.",
    "analysis_type": "all",
    "limit": 10
})

print("=== MLOPS CATEGORY SPEAKER ANALYSIS ===")
pprint(result7)

=== MLOPS CATEGORY SPEAKER ANALYSIS ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     "representation, filtered by category 'Model dev, "
                     "training, arch.'",
 'company_breakdown': [{'company': 'AI Makerspace', 'speaker_count': 3},
                       {'company': 'Google Cloud', 'speaker_count': 2},
                       {'company': 'Lightning AI', 'speaker_count': 2},
                       {'company': 'Union.ai', 'speaker_count': 2},
                       {'company': 'Liquid AI', 'speaker_count': 1},
                       {'company': 'Voiceflow', 'speaker_count': 1},
                       {'company': 'SambaNova Systems', 'speaker_count': 1},
                       {'company': 'Deepgram', 'speaker_count': 1},
                       {'company': 'Toyota', 'speaker_count': 1},
                       {'company': 'Groq', 'speaker_count': 1},
                       {'company': 'RBC', 'speaker_count': 1},
     

### Test 8: High-Frequency Speaker Analysis
**Query**: "Find super active speakers who gave 3+ presentations - identify key thought leaders"

This identifies the most prolific speakers who have given multiple presentations, helping find key thought leaders and most active community members.

In [9]:
# Test 8: High-Frequency Speaker Analysis
result8 = analyze_speaker_activity.invoke({
    "min_talk_count": 3,
    "analysis_type": "talk_count",
    "limit": 8
})

print("=== HIGH-FREQUENCY SPEAKERS (3+ talks) ===")
pprint(result8)

=== HIGH-FREQUENCY SPEAKERS (3+ talks) ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts',
 'company_breakdown': {},
 'repeat_speakers': [{'avg_views': 510,
                      'categories': ['Deployment and integration',
                                     'Model dev, training, arch.'],
                      'companies': ['Lightning AI'],
                      'events': ['MLOps & GenAI World 2024',
                                 'MLOps & GenAI World 2023',
                                 'TMLS 2024'],
                      'speaker_name': 'Aniket Maurya',
                      'total_talks': 3,
                      'total_views': 1529},
                     {'avg_views': 28,
                      'categories': ['Ethics, governance compliance'],
                      'companies': ['Armilla AI', 'AutoAlign AI'],
                      'events': ['TMLS 2022',
                                 'MLOps & GenAI World 2024',
                                 'TMLS 2

### Test 9: Multi-Company Analysis (Microsoft Focus)
**Query**: "Analyze Microsoft's speaker representation and compare to the broader ecosystem"

This examines Microsoft's presence in the MLOps community through their speakers, useful for competitive analysis and understanding corporate participation patterns.

In [10]:
# Test 9: Multi-Company Analysis (Microsoft Focus)
result9 = analyze_speaker_activity.invoke({
    "company_name": "Microsoft",
    "analysis_type": "companies",
    "limit": 15
})

print("=== MICROSOFT SPEAKER ANALYSIS ===")
pprint(result9)

=== MICROSOFT SPEAKER ANALYSIS ===
{'analysis_summary': 'Speaker activity analysis: company representation, '
                     "filtered by company 'Microsoft'",
 'company_breakdown': [{'company': 'Microsoft', 'speaker_count': 1}],
 'repeat_speakers': [],
 'speaker_stats': [{'avg_views': 34,
                    'categories': ['Deployment and integration'],
                    'companies': ['Microsoft'],
                    'events': ['MLOps & GenAI World 2024'],
                    'speaker_name': 'Pablo  Salvador Lopez',
                    'total_talks': 1,
                    'total_views': 34}],
 'success': True,
 'topic_analysis': {},
 'total_speakers': 1}


### Test 10: Quarterly Speaker Activity Analysis
**Query**: "Analyze speaker activity for Q2 2024 (April-June) to understand seasonal patterns"

This examines speaker activity within a specific quarter to identify seasonal trends, event clustering, and temporal patterns in conference participation.

In [11]:
# Test 10: Quarterly Speaker Activity Analysis
result10 = analyze_speaker_activity.invoke({
    "date_from": "2024-04-01",
    "date_to": "2024-06-30",
    "analysis_type": "all",
    "limit": 12
})

print("=== Q2 2024 SPEAKER ACTIVITY ===")
pprint(result10)

=== Q2 2024 SPEAKER ACTIVITY ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     'representation, filtered by from 2024-04-01, until '
                     '2024-06-30',
 'company_breakdown': [{'company': 'Instacart', 'speaker_count': 1},
                       {'company': 'Stitch Fix', 'speaker_count': 1},
                       {'company': 'Syngenta', 'speaker_count': 1},
                       {'company': 'TitanML', 'speaker_count': 1},
                       {'company': 'Grammarly', 'speaker_count': 1},
                       {'company': 'Ford Motor Company', 'speaker_count': 1},
                       {'company': 'Canva', 'speaker_count': 1},
                       {'company': 'Stripe', 'speaker_count': 1},
                       {'company': 'Aporia', 'speaker_count': 1},
                       {'company': 'Abnormal Security', 'speaker_count': 1},
                       {'company': 'DVC', 'speaker_count': 1},
                   

### Test 11: Future Trends Specialists Analysis
**Query**: "Find speakers who focus on future trends - identify forward-thinking thought leaders"

This identifies speakers who specialize in future trends and emerging technologies, useful for finding visionaries and strategic thinkers in the MLOps space.

In [12]:
# Test 11: Future Trends Specialists Analysis
result11 = analyze_speaker_activity.invoke({
    "category": "Future trends",
    "analysis_type": "all",
    "min_talk_count": 1,
    "limit": 8
})

print("=== FUTURE TRENDS SPECIALISTS ===")
pprint(result11)

=== FUTURE TRENDS SPECIALISTS ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts, company '
                     "representation, filtered by category 'Future trends'",
 'company_breakdown': [{'company': 'University of British Columbia',
                        'speaker_count': 1},
                       {'company': 'CARTE, University of Toronto',
                        'speaker_count': 1},
                       {'company': 'Sematic', 'speaker_count': 1},
                       {'company': 'Tentorrent', 'speaker_count': 1},
                       {'company': 'Half Helix', 'speaker_count': 1},
                       {'company': 'University of Toronto', 'speaker_count': 1},
                       {'company': 'Ternary Data, LLC, VEOX Inc, Independent '
                                   'Data and AI Scientist',
                        'speaker_count': 1},
                       {'company': 'Georgian', 'speaker_count': 1},
                       {'company': 'WhyHow.

### Test 12: Top Speaker Engagement Analysis
**Query**: "Find the top 5 most active speakers across all events and analyze their impact"

This identifies the most prolific speakers in the entire dataset, providing a comprehensive view of who are the key voices and thought leaders in the MLOps community.

In [13]:
# Test 12: Top Speaker Engagement Analysis
result12 = analyze_speaker_activity.invoke({
    "analysis_type": "talk_count",
    "limit": 5
})

print("=== TOP 5 MOST ACTIVE SPEAKERS ===")
pprint(result12)

=== TOP 5 MOST ACTIVE SPEAKERS ===
{'analysis_summary': 'Speaker activity analysis: speaker talk counts',
 'company_breakdown': {},
 'repeat_speakers': [{'avg_views': 510,
                      'categories': ['Deployment and integration',
                                     'Model dev, training, arch.'],
                      'companies': ['Lightning AI'],
                      'events': ['MLOps & GenAI World 2024',
                                 'MLOps & GenAI World 2023',
                                 'TMLS 2024'],
                      'speaker_name': 'Aniket Maurya',
                      'total_talks': 3,
                      'total_views': 1529},
                     {'avg_views': 28,
                      'categories': ['Ethics, governance compliance'],
                      'companies': ['Armilla AI', 'AutoAlign AI'],
                      'events': ['TMLS 2022',
                                 'MLOps & GenAI World 2024',
                                 'TMLS 2023'],
 