# MLOps Events - Query Examples

This notebook demonstrates various query patterns for retrieving talks from ApertureDB using filters, relationships, and semantic search.

## Setup

### Install Dependencies

Install PyTorch, Sentence Transformers, and ApertureDB SDK.

In [1]:
!pip install -qU torch sentence-transformers aperturedb

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.6/486.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.9/141.9 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.2/47.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.8/137.8 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m88.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m71.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m56.4 MB/s[0m eta [36m0:00:

### Import Libraries

Import necessary libraries for embeddings and database operations.

In [5]:
import torch
from sentence_transformers import SentenceTransformer

from google.colab import userdata

from aperturedb.CommonLibrary import create_connector
from aperturedb.Utils import Utils
from aperturedb.ParallelLoader import ParallelLoader
from aperturedb import Connector

### Initialize Model & Connection

Configure ApertureDB connection and load the embedding model.

In [7]:
APERTUREDB_KEY=userdata.get('APERTUREDB_KEY')

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

model = SentenceTransformer("google/embeddinggemma-300m", device=DEVICE)
model.max_seq_length = 512

modules.json:   0%|          | 0.00/573 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/997 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/18.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.21G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/312 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

2_Dense/model.safetensors:   0%|          | 0.00/9.44M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

3_Dense/model.safetensors:   0%|          | 0.00/9.44M [00:00<?, ?B/s]

### Query Helper Function

Create a helper function to execute queries and display responses.

In [9]:
client = create_connector(key=APERTUREDB_KEY)

def run(q):
    resp, blobs = client.query(q)
    client.print_last_response()
    return resp

## Basic Filter Queries

### Query 1: Recent Talks

Find the 3 most recently published talks sorted by publication date.

In [10]:
run([{
  "FindEntity": {
    "with_class": "Talk",
    "sort": { "key": "yt_published_at", "order": "descending" },
    "limit": 3,
    "results": { "list": ["talk_id","talk_title","yt_published_at","youtube_url"] }
  }
}])


[
    {
        "FindEntity": {
            "entities": [
                {
                    "talk_id": "b792c10c-776b-5e2d-bb2e-3f25ea29f330",
                    "talk_title": "Building GenAI-Powered Apps: A Workshop for Software Engineers",
                    "youtube_url": "https://www.youtube.com/watch?v=tSIpREFVMXs",
                    "yt_published_at": {
                        "_date": "2025-02-13T00:00:00+00:00"
                    }
                },
                {
                    "talk_id": "36ad6014-7028-578d-800a-f969b7605df9",
                    "talk_title": "LLMidas' Touch; Safely adopting GenAI for production use-cases",
                    "youtube_url": "https://www.youtube.com/watch?v=A3KschpEU_g",
                    "yt_published_at": {
                        "_date": "2025-01-14T00:00:00+00:00"
                    }
                },
                {
                    "talk_id": "3bec210f-b106-5ff7-ab2c-3b467ff012d9",
                    "talk

[{'FindEntity': {'entities': [{'talk_id': 'b792c10c-776b-5e2d-bb2e-3f25ea29f330',
     'talk_title': 'Building GenAI-Powered Apps: A Workshop for Software Engineers',
     'youtube_url': 'https://www.youtube.com/watch?v=tSIpREFVMXs',
     'yt_published_at': {'_date': '2025-02-13T00:00:00+00:00'}},
    {'talk_id': '36ad6014-7028-578d-800a-f969b7605df9',
     'talk_title': "LLMidas' Touch; Safely adopting GenAI for production use-cases",
     'youtube_url': 'https://www.youtube.com/watch?v=A3KschpEU_g',
     'yt_published_at': {'_date': '2025-01-14T00:00:00+00:00'}},
    {'talk_id': '3bec210f-b106-5ff7-ab2c-3b467ff012d9',
     'talk_title': 'Optimizing AI/ML Workflows on Kubernetes: Advanced Techniques and Integration',
     'youtube_url': 'https://www.youtube.com/watch?v=grCvM9tkS7Q',
     'yt_published_at': {'_date': '2024-12-11T00:00:00+00:00'}}],
   'returned': 3,
   'status': 0}}]

### Query 2: Talks by Date Range

Count talks published in a specific date range (Aug-Dec 2024).

In [12]:
run([{
  "FindEntity": {
    "with_class": "Talk",
    "constraints": {
      "yt_published_at": [
        ">=", {"_date": "2024-08-01"},
        "<=", {"_date": "2024-12-31"}
      ]
    },
    "results": { "count": True }
  }
}])


[
    {
        "FindEntity": {
            "count": 128,
            "returned": 0,
            "status": 0
        }
    }
]


[{'FindEntity': {'count': 128, 'returned': 0, 'status': 0}}]

### Query 3: Top Viewed Talks

Find the top 5 talks with highest view counts (>50 views).

In [13]:
run([{
  "FindEntity": {
    "with_class": "Talk",
    "constraints": { "yt_views": [">", 50] },
    "sort": { "key": "yt_views", "order": "descending" },
    "limit": 5,
    "results": { "list": ["talk_title","yt_views","youtube_url","yt_published_at"] }
  }
}])


[
    {
        "FindEntity": {
            "entities": [
                {
                    "talk_title": "BloombergGPT: How we built a 50 billion parameter financial language model",
                    "youtube_url": "https://www.youtube.com/watch?v=m2Scj2SO85Y",
                    "yt_published_at": {
                        "_date": "2023-08-17T00:00:00+00:00"
                    },
                    "yt_views": 127705
                },
                {
                    "talk_title": "Quantifying the uncertainty in model predictions",
                    "youtube_url": "https://www.youtube.com/watch?v=-K8vDIyT3xY",
                    "yt_published_at": {
                        "_date": "2023-08-17T00:00:00+00:00"
                    },
                    "yt_views": 4364
                },
                {
                    "talk_title": "Building a Measurement System for Personalization: A Bayesian Approach",
                    "youtube_url": "https://www.youtub

[{'FindEntity': {'entities': [{'talk_title': 'BloombergGPT: How we built a 50 billion parameter financial language model',
     'youtube_url': 'https://www.youtube.com/watch?v=m2Scj2SO85Y',
     'yt_published_at': {'_date': '2023-08-17T00:00:00+00:00'},
     'yt_views': 127705},
    {'talk_title': 'Quantifying the uncertainty in model predictions',
     'youtube_url': 'https://www.youtube.com/watch?v=-K8vDIyT3xY',
     'yt_published_at': {'_date': '2023-08-17T00:00:00+00:00'},
     'yt_views': 4364},
    {'talk_title': 'Building a Measurement System for Personalization: A Bayesian Approach',
     'youtube_url': 'https://www.youtube.com/watch?v=2DZogx96aR4',
     'yt_published_at': {'_date': '2023-08-17T00:00:00+00:00'},
     'yt_views': 3755},
    {'talk_title': 'Agentic AI: Unlocking Emergent Behavior in LLMs for Adaptive Workflow Automation',
     'youtube_url': 'https://www.youtube.com/watch?v=GwQi33fmexU',
     'yt_published_at': {'_date': '2024-10-31T00:00:00+00:00'},
     'yt_vie

### Query 4: Multi-Criteria Filtering

Filter talks by category, track, event, technical level, and date range.

In [18]:
run([{
  "FindEntity": {
    "with_class": "Talk",
    "constraints": {
      "category_primary": ["==", "CATEGORY"],
      "track": ["==", "TRACK"],
      "event_name": ["==", "EVENT"],
      "tech_level": [">=", 1, "<=", 10],
      "yt_published_at": [
        ">=", {"_date":"2023-08-17"},
        "<=", {"_date":"2024-12-17"}
      ]
    },
    "results": { "list": ["talk_id","talk_title","speaker_name","event_name","tech_level","youtube_url"] },
    "limit": 3
  }
}]
)

[
    {
        "FindEntity": {
            "returned": 0,
            "status": 0
        }
    }
]


[{'FindEntity': {'returned': 0, 'status': 0}}]

## Relationship-Based Queries

### Query 5: Talks by Speaker

Find all talks by a specific speaker (Eric Hart) using graph relationships.

In [19]:
run([
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Person",
      "unique": True,
      "constraints": { "name": ["==", "Eric Hart"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": {
        "ref": 1, "direction": "in", "connection_class": "TalkHasSpeaker"
      },
      "sort": { "key": "yt_published_at", "order": "descending" },
      "results": { "list": ["talk_id","talk_title","event_name","yt_published_at","youtube_url"] },
      "limit": 5
  }}
]
)

[
    {
        "FindEntity": {
            "returned": 0,
            "status": 0
        }
    },
    {
        "FindEntity": {
            "entities": [
                {
                    "event_name": "TMLS 2022",
                    "talk_id": "ba3079f0-0e49-5b76-bf21-42c8ab5e8f04",
                    "talk_title": "Optimal Beer Pricing: An Optimization Layer for Price Elasticities",
                    "youtube_url": "https://www.youtube.com/watch?v=5J8g-EHbDSc",
                    "yt_published_at": {
                        "_date": "2023-08-17T00:00:00+00:00"
                    }
                }
            ],
            "returned": 1,
            "status": 0
        }
    }
]


[{'FindEntity': {'returned': 0, 'status': 0}},
 {'FindEntity': {'entities': [{'event_name': 'TMLS 2022',
     'talk_id': 'ba3079f0-0e49-5b76-bf21-42c8ab5e8f04',
     'talk_title': 'Optimal Beer Pricing: An Optimization Layer for Price Elasticities',
     'youtube_url': 'https://www.youtube.com/watch?v=5J8g-EHbDSc',
     'yt_published_at': {'_date': '2023-08-17T00:00:00+00:00'}}],
   'returned': 1,
   'status': 0}}]

### Query 6: Count Speaker's Talks

Count total number of talks by a specific speaker.

In [21]:
run([
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Person",
      "unique": True,
      "constraints": { "name": ["==", "Eric Hart"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 1, "direction": "in", "connection_class": "TalkHasSpeaker" },
      "results": { "count": True }
  }}
])


[
    {
        "FindEntity": {
            "returned": 0,
            "status": 0
        }
    },
    {
        "FindEntity": {
            "count": 1,
            "returned": 0,
            "status": 0
        }
    }
]


[{'FindEntity': {'returned': 0, 'status': 0}},
 {'FindEntity': {'count': 1, 'returned': 0, 'status': 0}}]

### Query 7: Talks by Company

Find talks given by speakers from a specific company (Google).

In [23]:
run([{
  "FindEntity": {
    "with_class": "Talk",
    "constraints": { "company_name": ["==", "Google"] },
    "results": { "list": ["talk_title","speaker_name","event_name","youtube_url"] },
    "limit": 3
  }
}]
)

[
    {
        "FindEntity": {
            "entities": [
                {
                    "event_name": "MLOps & GenAI World 2024",
                    "speaker_name": "Nathan Beach",
                    "talk_title": "Leverage Kubernetes To Optimize the Utilization of Your AI Accelerators",
                    "youtube_url": "https://www.youtube.com/watch?v=5jdZksHaJ_Q"
                },
                {
                    "event_name": "MLOps & GenAI World 2024",
                    "speaker_name": "Anu Reddy",
                    "talk_title": "Optimizing AI/ML Workflows on Kubernetes: Advanced Techniques and Integration",
                    "youtube_url": "https://www.youtube.com/watch?v=grCvM9tkS7Q"
                },
                {
                    "event_name": "TMLS 2024",
                    "speaker_name": "Patrick Marlow",
                    "talk_title": "Agentic AI: Unlocking Emergent Behavior in LLMs for Adaptive Workflow Automation",
                    

[{'FindEntity': {'entities': [{'event_name': 'MLOps & GenAI World 2024',
     'speaker_name': 'Nathan Beach',
     'talk_title': 'Leverage Kubernetes To Optimize the Utilization of Your AI Accelerators',
     'youtube_url': 'https://www.youtube.com/watch?v=5jdZksHaJ_Q'},
    {'event_name': 'MLOps & GenAI World 2024',
     'speaker_name': 'Anu Reddy',
     'talk_title': 'Optimizing AI/ML Workflows on Kubernetes: Advanced Techniques and Integration',
     'youtube_url': 'https://www.youtube.com/watch?v=grCvM9tkS7Q'},
    {'event_name': 'TMLS 2024',
     'speaker_name': 'Patrick Marlow',
     'talk_title': 'Agentic AI: Unlocking Emergent Behavior in LLMs for Adaptive Workflow Automation',
     'youtube_url': 'https://www.youtube.com/watch?v=GwQi33fmexU'}],
   'returned': 3,
   'status': 0}}]

## Semantic Search Queries

### Query 8: Transcript Semantic Search

Search transcript chunks for talks discussing "AI agents" using K-NN.

In [24]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Which talks referred to AI agents?"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindDescriptor": {
      "_ref": 1,
      "set": "ds_transcript_chunks_v1",
      "k_neighbors": 5,
      "results": { "list": ["_distance","chunk_id","talk_id","seq","start_sec","end_sec","chunk_text"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasTranscriptChunk" },
      "results": { "list": ["talk_id","talk_title","speaker_name","youtube_url"] }
  }}
]

resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindDescriptor': {'entities': [{'_distance': None, 'chunk_id': '4b5ed1f3-6dc6-5b0f-9dfb-03f9e027f218#ch0015', 'chunk_text': 'engineering so in conclusion AI tools are transformative but they need proper oversight to avoid chaos while AI might not be perfect with a correct security measures in place like aore guardrails you can confidently harness its power and enjoy the wonders of AI thank you', 'end_sec': 282, 'seq': 15, 'start_sec': 282, 'talk_id': '4b5ed1f3-6dc6-5b0f-9dfb-03f9e027f218'}, {'_distance': None, 'chunk_id': '14b1947c-c2ac-5777-8a3b-4593460325f7#ch0001', 'chunk_text': "[Applause] [Music] yep thanks everyone so nice to meet you Michael triffle here with Rea AI um so to rea the future of AI is multimodal so our mission is to develop Next Generation AI to empower the most capable agents that can see hear as well as speak we'll play a little", 'end_sec': 38, 'seq': 1, 'start_sec': 0, 'talk_id': '14b1947c-c2ac-5777-8a3b-4593460325f7'}, {'_distance': None, 'chunk_id': 'c27e4

### Query 9: Semantic Search Within Talk Time Range

Search for "Generative AI" mentions in a specific talk's first 5 minutes.

In [26]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Was Generative AI mentioned?"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Talk",
      "unique": True,
      "constraints": { "talk_title": ["==", "GenAI: A New Renaissance in Product Development"] },
      "results": { "list": ["talk_id"] }
  }},
  { "FindDescriptor": {
      "set": "ds_transcript_chunks_v1",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasTranscriptChunk" },
      "constraints": { "start_sec": [">=", 0], "end_sec": ["<=", 300] },
      "k_neighbors": 5,
      "results": { "list": ["_distance","seq","chunk_id","start_sec","end_sec","chunk_text"] }
  }}
]


resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindEntity': {'entities': [{'talk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a'}], 'returned': 1, 'status': 0}}, {'FindDescriptor': {'entities': [{'_distance': None, 'chunk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0012', 'chunk_text': "it's very helpful for Innovation uh so throughout this talk I have asked chat GPT to come up with a bunch of random uh activities that will take three to 5 seconds each I then ask chat GPT to uh pick random locations in my slides to drop them in so I don't even know where they're really going to pop up you will see them flash up or we'll both see them flash up with large kind of salmon", 'end_sec': 235, 'seq': 12, 'start_sec': 212}, {'_distance': None, 'chunk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0006', 'chunk_text': "today I consider myself an Explorer I'm going to start off by maybe just laying the groundwork in terms of why I think uh gener AI is transformative I'm then going to talk about different use cases uh for using it with global te

### Query 10: Filter + Semantic Search (Event & Category)

Combine event/category filters with semantic search on transcripts.

In [27]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Was Generative AI mentioned?"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Talk",
      "constraints": {
        "event_name": ["==", "MLOps & GenAI World 2024"],
        "yt_published_at": [
          ">=", {"_date":"2023"},
          "<=", {"_date":"2024"}
        ],
        "category_primary": ["==", "Model dev, training, arch."]
      },
      "results": { "list": ["talk_id"] }
  }},
  { "FindDescriptor": {
      "set": "ds_transcript_chunks_v1",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasTranscriptChunk" },
      "k_neighbors": 5,
      "results": { "list": ["_distance","chunk_id","talk_id","seq","start_sec","end_sec","chunk_text"] }
  }}
]


resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


{'info': "JSON Command 1: [FindEntity.constraints.yt_published_at]: invalid predicate specification: invalid formatted date '2023'", 'status': -1}


### Query 11: Same with Corrected Date Format

Re-run the previous query with proper date formatting.

In [28]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Was Generative AI mentioned?"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Talk",
      "constraints": {
        "event_name": ["==", "MLOps & GenAI World 2024"],
        "yt_published_at": [
          ">=", {"_date":"2023-01-01"},
          "<=", {"_date":"2024-12-31"}
        ],
        "category_primary": ["==", "Model dev, training, arch."]
      },
      "results": { "list": ["talk_id"] }
  }},
  { "FindDescriptor": {
      "set": "ds_transcript_chunks_v1",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasTranscriptChunk" },
      "k_neighbors": 5,
      "results": { "list": ["_distance","chunk_id","talk_id","seq","start_sec","end_sec","chunk_text"] }
  }}
]


resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindEntity': {'entities': [{'talk_id': '3df9ae95-3c92-55a2-8c2f-3513eb3030f5'}, {'talk_id': 'f4705fdf-cadd-55ad-ae5c-c8c1214fd6a8'}, {'talk_id': '57c5c0a0-9feb-58cb-a8db-21537846b7c4'}, {'talk_id': 'cfb5afe0-dd5b-5e7c-9812-e58a2507be68'}, {'talk_id': 'e3041800-aaf9-5305-a2e2-824a41b6bfbd'}, {'talk_id': 'c91166e2-983a-5723-a7b3-711041269ef6'}, {'talk_id': '9b337878-a714-5f62-9acd-6c6e917eeccb'}, {'talk_id': '0dd93bdd-5233-5a99-9518-af3b1d80e472'}, {'talk_id': '8bcffc59-a3a2-5670-b7a7-230bd8f5c149'}, {'talk_id': '54a66ba3-4f1c-5608-ab70-481249f5c5d3'}, {'talk_id': '2ae81af2-ac61-54d8-b44a-3226ac52013c'}, {'talk_id': 'a150b954-a5b9-5a51-894a-c565c22d3876'}, {'talk_id': '54df2d6c-51a1-5cf2-907d-52df2b14368f'}, {'talk_id': '79acc363-6a78-5405-9cc2-834aa9f56c60'}, {'talk_id': 'df52f16e-7e4c-56b8-a5b7-215b1ef25406'}], 'returned': 15, 'status': 0}}, {'FindDescriptor': {'returned': 0, 'status': 0}}]


### Query 12: Metadata Semantic Search

Search talk metadata (abstract, keywords) for relevant content about "AI agents".

In [29]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: AI agents"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindDescriptor": {
      "_ref": 1,
      "set": "ds_talk_meta_v1",
      "k_neighbors": 5,
      "results": { "list": ["_distance","descriptor_uid","talk_id","meta_text"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasMeta" },
      "results": { "list": ["talk_title","speaker_name","youtube_url","event_name"] }
  }}
]



resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindDescriptor': {'entities': [{'_distance': None, 'descriptor_uid': 'df7ba3f6-88d0-5225-96e9-4d66d969b090::ds_talk_meta_v1', 'meta_text': 'Panel: Achieving Long-Term AI Growth: Real Problem Solving vs. Trend-Based Solutions | Future trends | Long-term growth, AI problem-solving, Trend-based solutions', 'talk_id': 'df7ba3f6-88d0-5225-96e9-4d66d969b090'}, {'_distance': None, 'descriptor_uid': '7255dc0f-0f29-5aad-9a95-a7956c44673a::ds_talk_meta_v1', 'meta_text': 'Panel: How companies can navigate and approach the new advancements in generative AI | Business and stakeholder alignment | Generative AI, Business Strategy, Panel Discussion', 'talk_id': '7255dc0f-0f29-5aad-9a95-a7956c44673a'}, {'_distance': None, 'descriptor_uid': '6b8d79ff-1417-57e1-9263-12d335800919::ds_talk_meta_v1', 'meta_text': 'Optimized AI Deployment Platform | Showcasing CentMLs ability to streamline the process of deploying and optimizing LLMs in production. | Deployment and integration | CentML, LLM deployment, Op

### Query 13: Filter + Metadata Search

Combine event/date filters with semantic search on talk metadata.

In [30]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: AI agents"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Talk",
      "constraints": {
        "event_name": ["==", "MLOps & GenAI World 2024"],
        "yt_published_at": [
          ">=", {"_date":"2023-01-01"},
          "<=", {"_date":"2024-12-31"}
        ]
      },
      "results": { "list": ["talk_id"] }
  }},
  { "FindDescriptor": {
      "set": "ds_talk_meta_v1",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasMeta" },
      "k_neighbors": 5,
      "results": { "list": ["_distance","descriptor_uid","talk_id","meta_text"] }
  }}
]




resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindEntity': {'entities': [{'talk_id': 'aa56daa2-2e78-5292-9fa9-bfa8cb22ce66'}, {'talk_id': '02f5e445-8821-5cb1-bada-88586b0f9ac8'}, {'talk_id': '3df9ae95-3c92-55a2-8c2f-3513eb3030f5'}, {'talk_id': '4636151c-7435-551f-95b6-cd3e67da828f'}, {'talk_id': 'f4705fdf-cadd-55ad-ae5c-c8c1214fd6a8'}, {'talk_id': '5ec5c69a-ad4d-5105-9a02-b9d147b0e812'}, {'talk_id': '57c5c0a0-9feb-58cb-a8db-21537846b7c4'}, {'talk_id': '6b8d79ff-1417-57e1-9263-12d335800919'}, {'talk_id': 'e2da4021-9d57-5dc4-8e2a-9f37948d1fd5'}, {'talk_id': '42849430-6a9e-51cd-8a60-9506b1664a96'}, {'talk_id': 'cfb5afe0-dd5b-5e7c-9812-e58a2507be68'}, {'talk_id': 'e3041800-aaf9-5305-a2e2-824a41b6bfbd'}, {'talk_id': '16bbe03e-eb37-5099-860f-c3069a336c21'}, {'talk_id': '69b5960c-0196-5f01-ae3d-412ae75be318'}, {'talk_id': 'c91166e2-983a-5723-a7b3-711041269ef6'}, {'talk_id': '9ce676d6-d415-501a-896f-a7ac09ecce6f'}, {'talk_id': '8e6f8334-380c-5a09-8662-9826bda35901'}, {'talk_id': 'fc3efe32-e399-5dfc-9cd5-70912f545122'}, {'talk_id': '4f8

### Query 14: Speaker Bio Search

Search speaker bios for "Machine Learning Engineer" role.

In [31]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Machine Learning Engineer"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindDescriptor": {
      "_ref": 1,
      "set": "ds_speaker_bio_v1",
      "k_neighbors": 5,
      "results": { "list": ["_distance","descriptor_uid","talk_id","bio_text"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasSpeakerBio" },
      "results": { "list": ["talk_title","speaker_name","company_name","youtube_url"] }
  }}
]





resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindDescriptor': {'entities': [{'_distance': None, 'bio_text': 'Member of Technical Staff | Work on LLMs, pretraining, long-context and deep-learning engineering.', 'descriptor_uid': '8a1b1e9a-6b99-5282-9032-61bf3da02ccb::ds_speaker_bio_v1', 'talk_id': '8a1b1e9a-6b99-5282-9032-61bf3da02ccb'}, {'_distance': None, 'bio_text': 'Senior Data Scientist | Vatsal Patel, Senior Data Scientist, MongoDB', 'descriptor_uid': '85361907-d39c-5970-ab4e-2c7b0b9d9da9::ds_speaker_bio_v1', 'talk_id': '85361907-d39c-5970-ab4e-2c7b0b9d9da9'}, {'_distance': None, 'bio_text': 'Technical Founder | Co-Founder @ WhyHow.AI', 'descriptor_uid': 'a55d2cac-4def-52a7-9232-d1e28fea0aa7::ds_speaker_bio_v1', 'talk_id': 'a55d2cac-4def-52a7-9232-d1e28fea0aa7'}, {'_distance': None, 'bio_text': 'Developer Advocate | I’m Aniket, a Machine Learning - Software Engineer with with over 4 years of experience, demonstrating a strong track record in developing and deploying machine learning models to production.', 'descriptor_uid

## Advanced Patterns

### Query 15: Grouped Chunks by Talk

Retrieve first 10 chunks for multiple talks, grouped by source.

In [33]:
run([
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Talk",
      "constraints": { "talk_title": ["in", ["GenAI: A New Renaissance in Product Development","LLMs, Big Data, and Audio: Breaching an Untapped Gold Mine","Toyota's Generative AI Journey"]] },
      "results": { "list": ["talk_title","talk_id"] }
  }},
  { "FindDescriptor": {
      "set": "ds_transcript_chunks_v1",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasTranscriptChunk" },
      "group_by_source": True,
      "sort": "seq",
      "limit": 10,
      "results": { "list": ["seq","chunk_id","start_sec","end_sec"] }
  }}
])

[
    {
        "FindEntity": {
            "entities": [
                {
                    "talk_id": "b0064f83-d89c-53d1-b6a7-73ced78a937a",
                    "talk_title": "GenAI: A New Renaissance in Product Development"
                },
                {
                    "talk_id": "95c82848-d9f9-54c3-a7eb-2f3fed38adac",
                    "talk_title": "LLMs, Big Data, and Audio: Breaching an Untapped Gold Mine"
                },
                {
                    "talk_id": "57c5c0a0-9feb-58cb-a8db-21537846b7c4",
                    "talk_title": "Toyota's Generative AI Journey"
                }
            ],
            "returned": 3,
            "status": 0
        }
    },
    {
        "FindDescriptor": {
            "entities": {
                "7.11.260": [
                    {
                        "chunk_id": "b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0001",
                        "end_sec": 37,
                        "seq": 1,
                      

[{'FindEntity': {'entities': [{'talk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a',
     'talk_title': 'GenAI: A New Renaissance in Product Development'},
    {'talk_id': '95c82848-d9f9-54c3-a7eb-2f3fed38adac',
     'talk_title': 'LLMs, Big Data, and Audio: Breaching an Untapped Gold Mine'},
    {'talk_id': '57c5c0a0-9feb-58cb-a8db-21537846b7c4',
     'talk_title': "Toyota's Generative AI Journey"}],
   'returned': 3,
   'status': 0}},
 {'FindDescriptor': {'entities': {'7.11.260': [{'chunk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0001',
      'end_sec': 37,
      'seq': 1,
      'start_sec': 2},
     {'chunk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0002',
      'end_sec': 60,
      'seq': 2,
      'start_sec': 31},
     {'chunk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0003',
      'end_sec': 78,
      'seq': 3,
      'start_sec': 51},
     {'chunk_id': 'b0064f83-d89c-53d1-b6a7-73ced78a937a#ch0004',
      'end_sec': 92,
      'seq': 4,
      'start_sec': 73}],
    '7.12.260': 

### Query 16: Find Similar Talks (Multi-Hop)

Use a talk's metadata embedding to find similar talks via K-NN.

In [37]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Machine Learning"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindEntity": {
      "_ref": 1,
      "with_class": "Talk",
      "unique": True,
      "constraints": { "talk_title": ["==", "Toyota's Generative AI Journey"] },
      "results": { "list": ["talk_id"] }
  }},
  { "FindDescriptor": {
      "_ref": 2,
      "set": "ds_talk_meta_v1",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasMeta" },
      "k_neighbors": 1,
      "results": { "all_prop": ["descriptor_uid"] }
  }},
  { "FindDescriptor": {
      "_ref": 3,
      "set": "ds_talk_meta_v1",
      "k_neighbors": 3,
      "results": { "list": ["_distance","descriptor_uid","talk_id","meta_text"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 3, "connection_class": "TalkHasMeta" },
      "results": { "list": ["talk_title","speaker_name","youtube_url"] }
  }}
]






resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


{'info': 'Expected blobs: 2. Received blobs: 1', 'status': -1}


### Query 17: Search Early Chunks Only

Search for "Machine Learning" mentions only in early chunks (seq ≤ 120).

In [38]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Machine Learning"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindDescriptor": {
      "_ref": 1,
      "set": "ds_transcript_chunks_v1",
      "constraints": { "seq": ["<=", 120] },
      "k_neighbors": 5,
      "results": { "list": ["_distance","chunk_id","talk_id","seq","chunk_text"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasTranscriptChunk" },
      "results": { "list": ["talk_title","speaker_name","youtube_url"] }
  }}
]



resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindDescriptor': {'entities': [{'_distance': None, 'chunk_id': 'cd9725e0-8921-53ee-8df8-1bdfc1476d5a#ch0014', 'chunk_text': "just machine learning like it's just not good old fashioned AI but you know data data focused AI but really what we're thinking about here is um a different approach to doing machine learning in general So In traditional machine learning like the data scientist thinks they have to manually you know do all the exploratory data analysis do all the work to discover the data issues fix", 'seq': 14, 'talk_id': 'cd9725e0-8921-53ee-8df8-1bdfc1476d5a'}, {'_distance': None, 'chunk_id': 'cd9725e0-8921-53ee-8df8-1bdfc1476d5a#ch0023', 'chunk_text': 'already trained and it works with pretty much any kind of standard machine learning model and really just consumes essentially uh the information learned by this model so that could be the uh feature representations like embeddings of your data as well as probabilistic predictions about your data from the trained model and so 

### Query 18: Compare ML vs GenAI Topics

Search metadata embeddings for comparative analysis of ML and GenAI topics.

In [39]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("google/embeddinggemma-300m")
qvec = model.encode(["query: Compare Machine Learning vs Generative AI"], normalize_embeddings=True)[0].astype("<f4")

q = [
  { "FindDescriptor": {
      "_ref": 1,
      "set": "ds_talk_meta_v1",
      "k_neighbors": 5,
      "results": { "list": ["_distance","descriptor_uid","talk_id","meta_text"] }
  }},
  { "FindEntity": {
      "with_class": "Talk",
      "is_connected_to": { "ref": 1, "connection_class": "TalkHasMeta" },
      "results": { "list": ["talk_title","speaker_name","youtube_url"] }
  }}
]



resp, _ = client.query(q, blobs=[qvec.tobytes()])  # vector must be supplied as a blob
print(resp)


[{'FindDescriptor': {'entities': [{'_distance': None, 'descriptor_uid': '6b8d79ff-1417-57e1-9263-12d335800919::ds_talk_meta_v1', 'meta_text': 'Optimized AI Deployment Platform | Showcasing CentMLs ability to streamline the process of deploying and optimizing LLMs in production. | Deployment and integration | CentML, LLM deployment, Optimization', 'talk_id': '6b8d79ff-1417-57e1-9263-12d335800919'}, {'_distance': None, 'descriptor_uid': 'df7ba3f6-88d0-5225-96e9-4d66d969b090::ds_talk_meta_v1', 'meta_text': 'Panel: Achieving Long-Term AI Growth: Real Problem Solving vs. Trend-Based Solutions | Future trends | Long-term growth, AI problem-solving, Trend-based solutions', 'talk_id': 'df7ba3f6-88d0-5225-96e9-4d66d969b090'}, {'_distance': None, 'descriptor_uid': '4e0c7f79-d08a-5eed-8500-eda5faa5ec2f::ds_talk_meta_v1', 'meta_text': 'Panel: The Current Investment Landscape. Opportunities & Challenges in ML/Gen AI | Business and stakeholder alignment | Investment landscape, Opportunities, ML/GenA