# Challenge 2: Jeopardy

### Vector Search Using Weaviate

- Load Up Dataset, keep at least the question, answer and round properties
- Check for number of objects stored in the database
- Search for the objects that are close to the concepts "Spice food recipes" and show questions and answers
- Find "Spice food recipes" related questions that were used in double Jeopardy rounds

In [21]:
import requests
import json
import weaviate
import os
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv


In [22]:
# Downlaod and load the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/intro-workshop/main/data/jeopardy_1k.json')
data = json.loads(resp.text)

In [23]:
# Parse the json data and preview the first 5 items
for i in range(5):
    print(json.dumps(data[i], indent=2))

# Type and length of the data
print(type(data))
print(len(data))

{
  "Air Date": "2006-11-08",
  "Round": "Double Jeopardy!",
  "Value": 800,
  "Category": "AMERICAN HISTORY",
  "Question": "Abraham Lincoln died across the street from this theatre on April 15, 1865",
  "Answer": "Ford's Theatre (the Ford Theatre accepted)"
}
{
  "Air Date": "2005-11-18",
  "Round": "Jeopardy!",
  "Value": 200,
  "Category": "RHYME TIME",
  "Question": "Any pigment on the wall so faded you can barely see it",
  "Answer": "faint paint"
}
{
  "Air Date": "1987-06-23",
  "Round": "Double Jeopardy!",
  "Value": 600,
  "Category": "AMERICAN HISTORY",
  "Question": "After the original 13, this was the 1st state admitted to the union",
  "Answer": "Vermont"
}
{
  "Air Date": "2011-01-13",
  "Round": "Jeopardy!",
  "Value": 400,
  "Category": "TRANSPORTATION",
  "Question": "In 1922 Warren Harding said that this \"gauges the speed of our present-day life. It long ago ran down simple living\"",
  "Answer": "the automobile"
}
{
  "Air Date": "2001-12-03",
  "Round": "Double Je

In [24]:
# Load the data into Weaviate
cohere_api_key = os.getenv("COHERE_APIKEY")

waeivate_api_key = os.getenv("WEAVIATE_API_KEY")

auth_config = weaviate.AuthApiKey(api_key=waeivate_api_key)

client = weaviate.Client(
    url="https://e2pxfwhqioinxijlmnqxw.c0.europe-west3.gcp.weaviate.cloud",
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": cohere_api_key
        #"X-Google-Studio-Api-Key": ai_studio_api_key
    }
)

In [25]:
if client.is_ready():
    print("Weaviate is ready")

Weaviate is ready


In [26]:
if client.schema.exists("Question"):
    client.schema.delete_class("Question")
    print("Class Deleted Successfully")

Class Deleted Successfully


In [27]:
# Define the class that will bes used to add the data 
# Add properties question, answer and round

class_deifinition = {
    "class": "Question",
    "description": "Jeopardy questions",
    "properties": [
        {
            "name": "question",
            "dataType": ["text"],
            "description": "The question"
        },
        {
            "name": "answer",
            "dataType": ["text"],
            "description": "The answer"
        },
        {
            "name": "round",
            "dataType": ["text"],
            "description": "The round"
        }
    ],
    "vectorizer": "text2vec-cohere",
    "vectorIndexConfig": {
        "distance": "cosine"
    }
}

client.schema.create_class(class_deifinition)

In [28]:
# Insert data into Weaviate using batch
with client.batch.configure() as batch:
    for i in range(len(data)):
        batch.add_data_object(
            data_object={
                "answer": data[i]["Answer"],
                "question": data[i]["Question"],
                "round": data[i]["Round"], 
            },
            class_name="Question"
        )


In [35]:
# Check for the number of objects stored in the database
objects = client.query.aggregate("Question").with_meta_count().do()
print(objects)

{'data': {'Aggregate': {'Question': [{'meta': {'count': 1000}}]}}}


In [36]:
# Search for the objects that are close to the concepts "Spice food recipes" and show questions and answers
result = (
    client.query.get("Question", ["question", "answer"])
    .with_near_text({"concepts":"spice food recipes"})
    .with_additional(["distance"])
    .with_limit(5)
    .do()
)

print(json.dumps(result, indent=2))

{
  "data": {
    "Get": {
      "Question": [
        {
          "_additional": {
            "distance": 0.50665164
          },
          "answer": "tripe",
          "question": "Popular in Pennsylvania, pepper pot is a peppery soup made from this stomach lining"
        },
        {
          "_additional": {
            "distance": 0.55406934
          },
          "answer": "licorice",
          "question": "Herbs anise & fennel resemble the flavor of this common black candy"
        },
        {
          "_additional": {
            "distance": 0.56054974
          },
          "answer": "Hollandaise sauce",
          "question": "Often served over asparagus, this creamy sauce was 1st made in France, not in the Netherlands"
        },
        {
          "_additional": {
            "distance": 0.5611992
          },
          "answer": "Chiles Rellenos",
          "question": "The name of this Mexican dish made with chiles & cheese translates to \"stuffed peppers\""
        

In [38]:
# Find "Spice food recipes" related questions that were used in double Jeopardy rounds
result = (
    client.query.get("Question", ["question", "answer", "round"])
    .with_near_text({"concepts":"spice food recipes"})
    .with_additional(["distance"])
    .with_limit(3)
    .with_where(
        {
            "path": ['round'],
            "operator" : "Equal",
            "valueText": "Double Jeopardy!"
        }
    )
    .do()
)

print(json.dumps(result, indent=2))

{
  "data": {
    "Get": {
      "Question": [
        {
          "_additional": {
            "distance": 0.59583455
          },
          "answer": "\"Jambalaya\"",
          "question": "This Creole concoction of meat & seafood is so good Hank Williams wrote a song about it in 1952",
          "round": "Double Jeopardy!"
        },
        {
          "_additional": {
            "distance": 0.6079453
          },
          "answer": "Herring",
          "question": "The Bismarck type of this fish is made of fillets cured in vinegar, salt & onions",
          "round": "Double Jeopardy!"
        },
        {
          "_additional": {
            "distance": 0.620338
          },
          "answer": "Snail",
          "question": "The Phoenicians used a liquid from several species of this gastropod to make Tyrian purple dye",
          "round": "Double Jeopardy!"
        }
      ]
    }
  }
}
