# What is HNSW?

### An Investigation into Document Relevance Labeling

How many of the 500 token chunks from Weaviate's blog posts are relevant for the question -- What is HNSW?

As judged by the Transformation Agent... 138!

In [2]:
import os
import weaviate

weaviate_client = weaviate.connect_to_weaviate_cloud(
    cluster_url=os.getenv("WEAVIATE_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WEAVIATE_API_KEY")),
)

In [3]:
weaviate_blogs = weaviate_client.collections.get("Blogs")

In [7]:
from weaviate.agents.transformation import TransformationAgent
from weaviate.agents.classes import Operations
from weaviate.collections.classes.config import DataType

query = "What is HNSW?"

add_query_relevance = Operations.append_property(
    property_name="relevance_assessment",
    data_type=DataType.BOOL,
    view_properties=["content"],
    instruction=f"""Assess if this document is relevant to the query: {query}.
    Return True if the document is relevant to the query, False otherwise.""",
)

agent = TransformationAgent(
    client=weaviate_client,
    collection="Blogs",
    operations=[add_query_relevance],
)

response = agent.update_all()  # The response is a TransformationResponse object

agent.get_status(workflow_id=response.workflow_id)  # Use the workflow_id to check the status of each workflow

{'workflow_id': 'TransformationWorkflow-56c75ebf02bcde53a3dc7bf539912d82',
 'status': {'batch_count': 0,
  'end_time': None,
  'start_time': '2025-05-22 23:12:00',
  'state': 'running',
  'total_duration': None,
  'total_items': 0}}

In [8]:
agent.get_status(workflow_id=response.workflow_id)

{'workflow_id': 'TransformationWorkflow-56c75ebf02bcde53a3dc7bf539912d82',
 'status': {'batch_count': 6,
  'end_time': '2025-05-22 23:14:19',
  'start_time': '2025-05-22 23:12:00',
  'state': 'completed',
  'total_duration': 139.53496,
  'total_items': 1463}}

In [11]:
response = weaviate_blogs.aggregate.over_all(
    return_metrics=weaviate.classes.query.Metrics("relevance_assessment").boolean(total_true=True)
)
print(response.properties["relevance_assessment"].total_true)

138
