## Load the data

In [1]:
import requests
import json
import weaviate
import os
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv



In [2]:
# download the data

url = "https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json"
response = requests.get(url)
data = json.loads(response.text)

# Preview the data type and length
print(type(data))
print(len(data))

# Preview the first entry
print(data[0])


<class 'list'>
10
{'Category': 'SCIENCE', 'Question': 'This organ removes excess glucose from the blood & stores it as glycogen', 'Answer': 'Liver'}


In [3]:
# print the rest of the data using function json_data

import json
def json_data(data, indent=2):
    print(json.dumps(data, indent=indent))


json_data(data)

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
    "Answer": "species"
  },
  {
    "Category": "SCIENCE",
   

## Initialize Weaviate

In [4]:
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

ai_studio_api_key = os.getenv("AI-STUDIO-API-KEY")

waeivate_api_key = os.getenv("WEAVIATE_API_KEY")

huggingfacehub_api_token = os.getenv("HUGGINGFACE-API-KEY")

cohere_api_key = os.getenv("COHERE_APIKEY")

auth_config = weaviate.AuthApiKey(api_key=waeivate_api_key)

client = weaviate.Client(
    url="https://e2pxfwhqioinxijlmnqxw.c0.europe-west3.gcp.weaviate.cloud",
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": cohere_api_key
        #"X-Google-Studio-Api-Key": ai_studio_api_key
    }
)

            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            


In [5]:
# Check weaviate is running
json_data(client.get_meta())

{
  "hostname": "http://[::]:8080",
  "modules": {
    "backup-gcs": {
      "bucketName": "weaviate-wcs-prod-cust-europe-west3-workloads-backups",
      "rootName": "7b6a577d-61ff-4083-a29d-78a394c9d05f"
    },
    "generative-anyscale": {
      "documentationHref": "https://docs.anyscale.com/endpoints/overview",
      "name": "Generative Search - Anyscale"
    },
    "generative-aws": {
      "documentationHref": "https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html",
      "name": "Generative Search - AWS"
    },
    "generative-cohere": {
      "documentationHref": "https://docs.cohere.com/reference/chat",
      "name": "Generative Search - Cohere"
    },
    "generative-mistral": {
      "documentationHref": "https://docs.mistral.ai/api/",
      "name": "Generative Search - Mistral"
    },
    "generative-octoai": {
      "documentationHref": "https://octo.ai/docs/text-gen-solution/getting-started",
      "name": "Generative Search - OctoAI"
    },
    "generative-

In [6]:
# Delete the schema if it already exists
if client.schema.exists("Question"):
    client.schema.delete_class("Question")

In [7]:
# Create the schema
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-cohere",
}

In [8]:
# Use the object above to create the schema
client.schema.create_class(class_obj)

In [9]:
with client.batch.configure() as batch:
    for i, question in enumerate(data):
        print(f"Adding question {i} to the batch")
        # Add the data to the batch
        batch.add_data_object(
            data_object={
                "answer": question["Answer"],
                "question": question["Question"],
                "category": question["Category"]
            },
            class_name="Question",
        )

Adding question 0 to the batch
Adding question 1 to the batch
Adding question 2 to the batch
Adding question 3 to the batch
Adding question 4 to the batch
Adding question 5 to the batch
Adding question 6 to the batch
Adding question 7 to the batch
Adding question 8 to the batch
Adding question 9 to the batch


In [10]:
# Check how many object we have loaded
json_data(client.query.aggregate("Question").with_meta_count().do())

{
  "data": {
    "Aggregate": {
      "Question": [
        {
          "meta": {
            "count": 10
          }
        }
      ]
    }
  }
}


In [11]:
# Extract and show any 3 questions and answers
json_data(client.query.get("Question", ['question', 'answer']).with_limit(3).do())

{
  "data": {
    "Get": {
      "Question": [
        {
          "answer": "the diamondback rattler",
          "question": "Heaviest of all poisonous snakes is this North American rattlesnake"
        },
        {
          "answer": "DNA",
          "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
        },
        {
          "answer": "Antelope",
          "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
        }
      ]
    }
  }
}


### Extract the Vector that represents each question

In [12]:
# Query to extract the vector for a question
result = client.query.get("Question", ['category', 'question', 'answer']).with_additional('vector').with_limit(1).do()

In [13]:
# Extract the category
result['data']['Get']['Question'][0]['category']

'ANIMALS'

In [14]:
# Extract the question
result['data']['Get']['Question'][0]['question']

'Heaviest of all poisonous snakes is this North American rattlesnake'

In [15]:
# Extract the answer
result['data']['Get']['Question'][0]['answer']

'the diamondback rattler'

In [16]:
# Display vector representation
result['data']['Get']['Question'][0]['_additional']['vector']

[0.00548172,
 0.01638794,
 0.02444458,
 0.010025024,
 0.01939392,
 0.043273926,
 0.0064964294,
 -0.037078857,
 -0.013038635,
 0.055603027,
 -0.025436401,
 -0.049713135,
 0.041046143,
 -0.01524353,
 0.031341553,
 0.0079574585,
 0.04248047,
 -0.009353638,
 0.020889282,
 -0.032714844,
 -0.019378662,
 0.022354126,
 0.0635376,
 -0.026763916,
 -0.009941101,
 0.0104599,
 -0.016983032,
 0.019958496,
 0.01701355,
 -0.06921387,
 0.00037384033,
 0.009063721,
 0.0063705444,
 0.013969421,
 -0.007419586,
 0.033599854,
 -0.012168884,
 -0.035858154,
 0.009353638,
 0.04055786,
 0.035949707,
 0.033111572,
 0.0046081543,
 -0.0005545616,
 -0.043762207,
 0.015419006,
 0.017959595,
 0.003042221,
 0.05340576,
 0.02760315,
 -0.03250122,
 0.046661377,
 -0.006248474,
 -0.024108887,
 -0.007095337,
 -0.0093688965,
 -0.016067505,
 0.0015668869,
 0.026779175,
 -0.014694214,
 0.025802612,
 0.019714355,
 -0.0032100677,
 0.016464233,
 0.0061798096,
 -0.019058228,
 -0.009101868,
 0.04171753,
 -0.00060272217,
 0.0055465

In [17]:
# How many numbers are in the Vector
def count_numbers(vector):
    """Count the number of elements in a vector"""
    count = 0
    for element in vector:
        if isinstance(element, (int, float)):
            count += 1
    return count

count_numbers(result['data']['Get']['Question'][0]['_additional']['vector'])

1024

In [18]:
len(vector:=result['data']['Get']['Question'][0]['_additional']['vector'])

1024

### Search for a relevant answer using vector search

In [19]:
# Vector search query to extract questions, answers and categories related to "Science" category
result = (
    client.query.get("Question", ['question', 'answer', 'category'])
    .with_near_text({"concepts": ["science"]})
    .with_limit(2)
    .do())

# Display the results
json_data(result, indent=4)

{
    "data": {
        "Get": {
            "Question": [
                {
                    "answer": "the atmosphere",
                    "category": "SCIENCE",
                    "question": "Changes in the tropospheric layer of this are what gives us weather"
                },
                {
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                }
            ]
        }
    }
}


In [20]:
# Write code to extract the distance between the query and returned vectors objects
result = (
    client.query.get("Question", ['question', 'answer', 'category'])
    .with_near_text({"concepts": ["science"]})
    .with_additional('distance')
    .with_limit(2)
    .do())

# Display the results
json_data(result, indent=4)

{
    "data": {
        "Get": {
            "Question": [
                {
                    "_additional": {
                        "distance": 0.47632474
                    },
                    "answer": "the atmosphere",
                    "category": "SCIENCE",
                    "question": "Changes in the tropospheric layer of this are what gives us weather"
                },
                {
                    "_additional": {
                        "distance": 0.49815953
                    },
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                }
            ]
        }
    }
}


In [21]:
# Extract all questions and analyze them based on distance
result = (
    client.query.get("Question", ['question', 'answer', 'category'])
    .with_near_text({"concepts": ["animals"]})
    .with_additional('distance')
    .with_limit(10)
    .do()
    )

# Display the results
json_data(result, indent=4)

{
    "data": {
        "Get": {
            "Question": [
                {
                    "_additional": {
                        "distance": 0.46997482
                    },
                    "answer": "Elephant",
                    "category": "ANIMALS",
                    "question": "It's the only living mammal in the order Proboseidea"
                },
                {
                    "_additional": {
                        "distance": 0.47995377
                    },
                    "answer": "Antelope",
                    "category": "ANIMALS",
                    "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
                },
                {
                    "_additional": {
                        "distance": 0.5098027
                    },
                    "answer": "the nose or snout",
                    "category": "ANIMALS",
                    "question": "The gavial looks very much lik

In [22]:
# set a max distance threshold
max_distance = 0.60

# Extract all questions and analyze them based on distance
result = (
    client.query.get("Question", ['question', 'answer', 'category'])
    .with_near_text({"concepts": ["animals"],
                     "distance": max_distance})
    .with_additional('distance').with_limit(10)
    .do()
    )

json_data(result, indent=4)

{
    "data": {
        "Get": {
            "Question": [
                {
                    "_additional": {
                        "distance": 0.46997482
                    },
                    "answer": "Elephant",
                    "category": "ANIMALS",
                    "question": "It's the only living mammal in the order Proboseidea"
                },
                {
                    "_additional": {
                        "distance": 0.47995377
                    },
                    "answer": "Antelope",
                    "category": "ANIMALS",
                    "question": "Weighing around a ton, the eland is the largest species of this animal in Africa"
                },
                {
                    "_additional": {
                        "distance": 0.5098027
                    },
                    "answer": "the nose or snout",
                    "category": "ANIMALS",
                    "question": "The gavial looks very much lik