## Load the data

In [1]:
import requests
import json
import weaviate
import os
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv



In [2]:
# download the data

url = "https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json"
response = requests.get(url)
data = json.loads(response.text)

# Preview the data type and length
print(type(data))
print(len(data))

# Preview the first entry
print(data[0])


<class 'list'>
10
{'Category': 'SCIENCE', 'Question': 'This organ removes excess glucose from the blood & stores it as glycogen', 'Answer': 'Liver'}


In [3]:
# print the rest of the data using function json_data

import json
def json_data(data, indent=2):
    print(json.dumps(data, indent=indent))


json_data(data)

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
    "Answer": "species"
  },
  {
    "Category": "SCIENCE",
   

## Initialize Weaviate

In [11]:
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

vertex_api_key = os.getenv("GOOGLE_APIKEY")

waeivate_api_key = os.getenv("WEAVIATE_API_KEY")

huggingfacehub_api_token = os.getenv("HUGGINGFACE-API-KEY")

auth_config = weaviate.AuthApiKey(api_key=waeivate_api_key)

client = weaviate.Client(
    url="https://e2pxfwhqioinxijlmnqxw.c0.europe-west3.gcp.weaviate.cloud",
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": "COHERE_APIKEY"
    }
)

In [12]:
# Check weaviate is running
json_data(client.get_meta())

{
  "hostname": "http://[::]:8080",
  "modules": {
    "backup-gcs": {
      "bucketName": "weaviate-wcs-prod-cust-europe-west3-workloads-backups",
      "rootName": "7b6a577d-61ff-4083-a29d-78a394c9d05f"
    },
    "generative-anyscale": {
      "documentationHref": "https://docs.anyscale.com/endpoints/overview",
      "name": "Generative Search - Anyscale"
    },
    "generative-aws": {
      "documentationHref": "https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html",
      "name": "Generative Search - AWS"
    },
    "generative-cohere": {
      "documentationHref": "https://docs.cohere.com/reference/chat",
      "name": "Generative Search - Cohere"
    },
    "generative-mistral": {
      "documentationHref": "https://docs.mistral.ai/api/",
      "name": "Generative Search - Mistral"
    },
    "generative-octoai": {
      "documentationHref": "https://octo.ai/docs/text-gen-solution/getting-started",
      "name": "Generative Search - OctoAI"
    },
    "generative-

In [13]:
# Delete the schema if it already exists
if client.schema.exists("Question"):
    client.schema.delete_class("Question")

In [14]:
# Create the schema
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-huggingface",
}

In [15]:
# Use the object above to create the schema
client.schema.create_class(class_obj)

In [16]:
with client.batch.configure() as batch:
    for i, question in enumerate(data):
        print(f"Adding question {i} to the batch")
        # Add the data to the batch
        batch.add_data_object(
            data_object={
                "answer": question["Answer"],
                "question": question["Question"],
                "category": question["Category"]
            },
            class_name="Question",
        )

Adding question 0 to the batch
Adding question 1 to the batch
Adding question 2 to the batch
Adding question 3 to the batch
Adding question 4 to the batch
Adding question 5 to the batch
Adding question 6 to the batch
Adding question 7 to the batch
Adding question 8 to the batch
Adding question 9 to the batch


In [17]:
# Check how many object we have loaded
json_data(client.query.aggregate("Question").with_meta_count().do())

{
  "data": {
    "Aggregate": {
      "Question": [
        {
          "meta": {
            "count": 10
          }
        }
      ]
    }
  }
}


In [19]:
# Extract and show any 3 questions and answers
json_data(client.query.get("Question", ['question', 'answer']).with_limit(3).do())

{
  "data": {
    "Get": {
      "Question": [
        {
          "answer": "species",
          "question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification"
        },
        {
          "answer": "the atmosphere",
          "question": "Changes in the tropospheric layer of this are what gives us weather"
        },
        {
          "answer": "wire",
          "question": "A metal that is ductile can be pulled into this while cold & under pressure"
        }
      ]
    }
  }
}
