In [6]:
import os
from dotenv import load_dotenv
import cohere
import weaviate
# ! pip install weaviate-client

In [8]:
# Load environment variables
load_dotenv()

# Initialize Cohere and Weaviate clients
co = cohere.Client(os.environ['COHERE_API_KEY'])

auth_config = weaviate.auth.AuthApiKey(api_key=os.environ['WEAVIATE_API_KEY'])
client = weaviate.Client(
    url=os.environ['WEAVIATE_API_URL'],
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": os.environ['COHERE_API_KEY'],
    }
)

Python client v3 `weaviate.Client(...)` connections and methods are deprecated and will
            be removed by 2024-11-30.

            Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
                - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
                - For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration

            If you have to use v3 code, install the v3 client and pin the v3 dependency in your requirements file: `weaviate-client>=3.26.7;<4.0.0`
  client = weaviate.Client(


In [12]:
schema = {
    "classes": [
        {
            "class": "Articles",  # Class name
            "description": "A class to store articles for retrieval",
            "vectorizer": "text2vec-cohere",  # Cohere vectorizer
            "properties": [
                {
                    "name": "title",
                    "dataType": ["string"],
                    "description": "The title of the article"
                },
                {
                    "name": "text",
                    "dataType": ["text"],
                    "description": "The unstructured content of the article"
                }
            ]
        }
    ]
}

# Add schema to Weaviate



In [13]:
client.schema.create(schema)

In [14]:
# Example unstructured data
articles = [
    {
        "title": "Ottawa: The Capital of Canada",
        "text": "Ottawa is the capital city of Canada, located in the province of Ontario."
    },
    {
        "title": "Canada Geography",
        "text": "Canada is the second-largest country in the world by land area."
    },
    {
        "title": "History of Canada",
        "text": "Canada has a rich history, including its indigenous peoples and colonial past."
    }
]

# Insert data into Weaviate
for article in articles:
    client.data_object.create(
        data_object=article,
        class_name="Article"
    )


In [15]:
query = "What is the capital of Canada?"

response = client.query.get("Article", ["title", "text"]) \
    .with_near_text({"concepts": [query]}) \
    .with_limit(3) \
    .do()

# Display results
for i, result in enumerate(response['data']['Get']['Article']):
    print(f"Result {i+1}:")
    print(f"Title: {result['title']}")
    print(f"Text: {result['text']}")


Result 1:
Title: Ottawa: The Capital of Canada
Text: Ottawa is the capital city of Canada, located in the province of Ontario.
Result 2:
Title: Ottawa: The Capital of Canada
Text: Ottawa is the capital city of Canada, located in the province of Ontario.
Result 3:
Title: Canada Geography
Text: Canada is the second-largest country in the world by land area.


In [16]:
query = "What is the capital of Canada?"

response = client.query.get("Article", ["title", "text"]) \
    .with_hybrid(query=query, alpha=0.7) \
    .with_limit(3) \
    .do()

# Display results
for i, result in enumerate(response['data']['Get']['Article']):
    print(f"Result {i+1}:")
    print(f"Title: {result['title']}")
    print(f"Text: {result['text']}")


Result 1:
Title: Ottawa: The Capital of Canada
Text: Ottawa is the capital city of Canada, located in the province of Ontario.
Result 2:
Title: Ottawa: The Capital of Canada
Text: Ottawa is the capital city of Canada, located in the province of Ontario.
Result 3:
Title: History of Canada
Text: Canada has a rich history, including its indigenous peoples and colonial past.


In [17]:
def rerank_results(query, results, co):
    # Extract the text of each document for reranking
    texts = [res.get('text', '') for res in results]  # Assuming `results` contains a list of dictionaries with a 'text' field

    if not all(texts):
        raise ValueError("Some documents are empty or missing text content.")

    # Call Cohere's rerank API
    print(texts)
    reranked = co.rerank(query=query, documents=texts, top_n=len(texts))

    return reranked


In [18]:
# Retrieve dense results
response = client.query.get("Article", ["title", "text"]) \
    .with_near_text({"concepts": [query]}) \
    .with_limit(10) \
    .do()

# Rerank results
dense_results = response['data']['Get']['Article']
reranked_results = rerank_results(query, dense_results, co)




['Ottawa is the capital city of Canada, located in the province of Ontario.', 'Ottawa is the capital city of Canada, located in the province of Ontario.', 'Canada is the second-largest country in the world by land area.', 'Canada is the second-largest country in the world by land area.', 'Canada has a rich history, including its indigenous peoples and colonial past.', 'Canada has a rich history, including its indigenous peoples and colonial past.']


In [19]:
texts = [res.get('text', '') for res in dense_results]

In [20]:
# Display reranked results
print('Query:', query)
for i, result in enumerate(reranked_results.results):
    print(f"Rank {i+1}: {result.index} : {texts[i]} (Relevance Score: {result.relevance_score})")

Query: What is the capital of Canada?
Rank 1: 0 : Ottawa is the capital city of Canada, located in the province of Ontario. (Relevance Score: 0.9935369)
Rank 2: 1 : Ottawa is the capital city of Canada, located in the province of Ontario. (Relevance Score: 0.9935369)
Rank 3: 2 : Canada is the second-largest country in the world by land area. (Relevance Score: 0.07613248)
Rank 4: 3 : Canada is the second-largest country in the world by land area. (Relevance Score: 0.07613248)
Rank 5: 4 : Canada has a rich history, including its indigenous peoples and colonial past. (Relevance Score: 0.02865267)
Rank 6: 5 : Canada has a rich history, including its indigenous peoples and colonial past. (Relevance Score: 0.02865267)


In [23]:
query = "capital of Canada"

response = client.query.get("Articles", ["title", "text"]).with_search(query=query).with_limit(3).do()

# Display sparse retrieval results
for i, result in enumerate(response['data']['Get']['Articles']):
    print(f"Result {i+1}:")
    print(f"Title: {result['title']}")
    print(f"Text: {result['text']}")


AttributeError: 'GetBuilder' object has no attribute 'with_search'