[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-features/reranking/voyageai-ranking/simple-reranking/voyageai-ranking.ipynb)

## Dependencies

In [None]:
!pip install weaviate-client

## Configuration

In [None]:
import weaviate, os

voyageai_key = os.environ["VOYAGEAI_API_KEY"] # Replace with your Voyage key

# Option 1: Connect to WCS cluster
# client = weaviate.connect_to_wcs(
#     cluster_url=os.getenv("WCS_DEMO_URL"),  # Replace with your WCS URL
#     auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WCS_DEMO_KEY")),  # Replace with your WCS key
#     headers={"X-VoyageAI-Api-Key": voyageai_key}
# )

# Option 2: Connect to Weaviate Embedded
client = weaviate.connect_to_embedded(
    version="1.24.8",
    headers={"X-VoyageAI-Api-Key": voyageai_key}
)

## Create a collection
> Collection stores your data and vector embeddings.

In [None]:
import weaviate.classes.config as wc

# Delete the collection if it already exists
if (client.collections.exists("BlogPost")):
    client.collections.delete("BlogPost")

client.collections.create(
    name="BlogPost",
    # Configure the vectorizer
    vectorizer_config=wc.Configure.Vectorizer.text2vec_voyageai( # specify the vectorizer and model type you're using
        model="voyage-large-2", # defaults to embed-multilingual-v2.0 if not set
        truncate=True, # defaults to RIGHT if not set 
    ),

    # Configure the reranker here
    reranker_config=wc.Configure.Reranker.voyageai(
        model="rerank-lite-1"
    ),

    properties=[ # defining properties (data schema) is optional
        wc.Property(name="Content", data_type=wc.DataType.TEXT),
        wc.Property(name="URL", data_type=wc.DataType.TEXT), 
    ]
)

print("Successfully created collection: BlogPost.")

## Upload Data

In [None]:
blogs = ['./ranking-models.mdx', './ref2vec-centroid.mdx'] 

data = {}

# Loop through each file path and read the file
for blog in blogs:
    with open(blog, 'r') as file:
        data[blog] = file.read()

Manually chunk up the document into smaller chunks. This results in the chunks being a bit messy, but this can be improved by using an external tool like LlamaIndex, Haystack, LangChain, etc. 

In [None]:
# Get a collection object for "BlogPost"
blogs = client.collections.get("BlogPost")

chunks = []

for source in data.keys():
    for i in range(0,len(data[source]), 500):
        item = {
            "source": source,
            "content": data[source][i:i+500]
        }

        chunks.append(item)

        # when chunks reach 100, insert chunks Weaviate
        if(len(chunks) >= 100):
            blogs.data.insert_many(chunks)
            chunks.clear()

# insert remaining chunks
if(len(chunks) > 0):
    blogs.data.insert_many(chunks)
    chunks.clear()

## Query

##### Query without reranking

In [None]:
import json

# note, you can reuse the collection object from the previous cell.
# Get a collection object for "BlogPost"
blogs = client.collections.get("BlogPost")

response = blogs.query.near_text(
    "Low hanging fruit to improve relevance",
    limit=5
)

for item in response.objects:
    print("ID:", item.uuid)
    print("Data:", json.dumps(item.properties, indent=2), "\n")

##### The first few results from the above query aren't exactly what we're looking for. Let's run the query again, but rerank the top 10 documents with the text in the content property. 

##### Query with Ranking

In [None]:
import json
import weaviate.classes.query as wq

# note, you can reuse the collection object from the previous cell.
# Get a collection object for "BlogPost"
blogs = client.collections.get("BlogPost")

response = blogs.query.near_text(
    "Low hanging fruit to improve relevance",
    limit=5,
    rerank=wq.Rerank(
        prop="content",
        query="Low hanging fruit"
    ),
    return_metadata=wq.MetadataQuery(score=True)
)

for item in response.objects:
    print("ID:", item.uuid)
    print("Data:", json.dumps(item.properties, indent=2), "\n")