# Query data

Learn three search methods: **Vector** (semantic similarity), **Keyword** (BM25), and **Hybrid** (combines both).

## Connect to Weaviate

Connect to a Weaviate instance.

In [None]:
# Refresh credentials & load the Weaviate IP
from helpers import update_creds

AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_SESSION_TOKEN = update_creds()

%store -r WEAVIATE_IP

In [None]:
import weaviate

client = weaviate.connect_to_local(
    WEAVIATE_IP,
    headers = {
        "X-AWS-Access-Key": AWS_ACCESS_KEY,
        "X-AWS-Secret-Key": AWS_SECRET_KEY,
        "X-AWS-Session-Token": AWS_SESSION_TOKEN,
    }
)

client.is_ready()

### Helper function

Get our collection of financial articles (5000 articles total).

In [None]:
# STUDENT TODO:
# Instantiate the "FinancialArticles" collection object
# ADD YOUR CODE HERE

## Filters

Add conditions to narrow down search results before querying.

### Fetch with filters

In [None]:
from weaviate.classes.query import Filter

# STUDENT TODO:
# Perfrom articles.query.fetch_objects, but add a filter the results
# to only include articles with "apple" in the title
# Hint: filters=Filter.by_property(<property_name>).like(<search_pattern>)
# ADD YOUR CODE HERE

print(f"Returned object count: {len(response.objects)}")

for item in response.objects:
    print(item.properties["article_title"])

In [None]:
from weaviate.classes.query import Filter

response = articles.query.fetch_objects(
    limit=5,
    filters=(
        Filter.by_property("article_title").like("aust*") &
        Filter.by_property("article_title").like("ev")
    )
)

print(f"Returned object count: {len(response.objects)}")

for item in response.objects:
    print(item.properties["article_title"])

## Keyword Search

![images/search_keyword.png](images/search_keyword.png)

[Docs - keyword/bm25](https://weaviate.io/developers/weaviate/search/bm25)

In [None]:
# STUDENT TODO:
# Keyword search for articles about "earnings report"
# Limit the results to 5 articles
# ADD YOUR CODE HERE

for item in response.objects:
    print(item.properties["article_title"])

Retrieve scores also

In [None]:
from weaviate.classes.query import MetadataQuery

response = articles.query.bm25(
    query="earnings report",
    query_properties=["article_title"],
    limit=5,
    return_metadata=MetadataQuery(score=True)
)

for item in response.objects:
    print(item.properties["article_title"])
    # STUDENT TODO - inspect keyword search "score"
    # ADD YOUR CODE HERE

## Vector search

![images/search_vector.png](images/search_vector.png)

[Docs - near_text](https://weaviate.io/developers/weaviate/search/similarity#an-input-medium)

In [None]:
# STUDENT TODO:
# Perform a `near_text` query to find the top 5 articles
# with `title` target vector closest to "tech market trends"
# ADD YOUR CODE HERE

for item in response.objects:
    print(item.properties["article_title"])

In [None]:
from weaviate.classes.query import MetadataQuery

response = articles.query.near_text(
    query="tech market trends",
    limit=5,
    target_vector="title",
    return_metadata=MetadataQuery(distance=True)
)

for item in response.objects:
    print(item.properties["article_title"])
    print(item.metadata.distance)

## Search with filters

Combine vector search with filters for precise results.

In [None]:
from weaviate.classes.query import Filter

response = articles.query.near_text(
    query="strategy",
    target_vector="title",
    limit=5,
    filters=Filter.by_property("article_title").like("netflix")
)

print(f"Returned object count: {len(response.objects)}")

for item in response.objects:
    print(item.properties["article_title"])

## Hybrid search

![images/search_hybrid.png](images/search_hybrid.png)

[Docs - hybrid](https://weaviate.io/developers/weaviate/search/hybrid)

In [None]:
response = articles.query.hybrid(
    query="earnings report",
    query_properties=["article_title"],
    limit=5,
    target_vector="title",
    alpha=0.7
)

for item in response.objects:
    print(item.properties["article_title"])

### Hybrid - Explain score

See how hybrid search combines vector and keyword scores.

In [None]:
from weaviate.classes.query import MetadataQuery

response = articles.query.hybrid(
    query="earnings report",
    query_properties=["article_title"],
    limit=5,
    target_vector="title",
    alpha=0.7,
    return_metadata=MetadataQuery(score=True, explain_score=True)
)

for item in response.objects:
    print(item.properties["article_title"])
    print(item.metadata.score)
    print(item.metadata.explain_score)

## Close the client

Always close your connection when finished.

In [None]:
client.close()