In [2]:
# Install python client reources

!pip3 install --pre -I "weaviate-client==4.*"

Collecting weaviate-client==4.*
  Using cached weaviate_client-4.2b0-py3-none-any.whl.metadata (3.3 kB)
Collecting requests<3.0.0,>=2.30.0 (from weaviate-client==4.*)
  Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)
Collecting validators<1.0.0,>=0.21.2 (from weaviate-client==4.*)
  Using cached validators-0.22.0-py3-none-any.whl.metadata (4.7 kB)
Collecting authlib<2.0.0,>=1.2.1 (from weaviate-client==4.*)
  Using cached Authlib-1.2.1-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting pydantic<3.0.0,>=2.1.1 (from weaviate-client==4.*)
  Downloading pydantic-2.5.0-py3-none-any.whl.metadata (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.6/174.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting grpcio<2.0.0,>=1.57.0 (from weaviate-client==4.*)
  Using cached grpcio-1.59.2-cp310-cp310-macosx_12_0_universal2.whl.metadata (4.0 kB)
Collecting grpcio-tools<2.0.0,>=1.57.0 (from weaviate-client==4.*)
  Using cached grpcio_tools-1

In [3]:
# Gather resources

!pip install weaviate-demo-datasets



In [4]:
# Connect to instance

import weaviate, os, json
import weaviate.classes as wvc

client = weaviate.connect_to_local(
    headers={
        "X-OpenAI-Api-Key": os.environ['OPENAI_API_KEY']  # Replace with your inference API key
    }
)

In [4]:
# Define collections

if (client.collections.exists("JeopardyCategory")):
    client.collections.delete("JeopardyCategory")

if (client.collections.exists("JeopardyQuestion")):
    client.collections.delete("JeopardyQuestion")

client.collections.create(
    name="JeopardyCategory",
    description="A Jeopardy! category",
    vectorizer_config=wvc.Configure.Vectorizer.text2vec_openai(),
    generative_config=wvc.Configure.Generative.openai(),
    properties=[
        wvc.Property(name="title", data_type=wvc.DataType.TEXT),
    ]
)

<weaviate.collections.collection.Collection at 0x106f83fa0>

In [5]:
# CrossRef Definition

client.collections.create(
    name="JeopardyQuestion",
    description="A Jeopardy! question",
    vectorizer_config=wvc.Configure.Vectorizer.text2vec_openai(),
    generative_config=wvc.Configure.Generative.openai(),
    properties=[
        wvc.Property(name="question", data_type=wvc.DataType.TEXT),
        wvc.Property(name="answer", data_type=wvc.DataType.TEXT),
        # highlight-start
        wvc.ReferenceProperty(
            name="hasCategory",
            target_collection="JeopardyCategory"
        )
        # highlight-end
    ]
)

client.collections.list_all()

{'JeopardyCategory': _CollectionConfigSimple(name='JeopardyCategory', description='A Jeopardy! category', properties=[_Property(data_type=<DataType.TEXT: 'text'>, description=None, index_filterable=True, index_searchable=True, name='title', tokenization=<Tokenization.WORD: 'word'>)], vectorizer=<Vectorizer.TEXT2VEC_OPENAI: 'text2vec-openai'>),
 'JeopardyQuestion': _CollectionConfigSimple(name='JeopardyQuestion', description='A Jeopardy! question', properties=[_Property(data_type=<DataType.TEXT: 'text'>, description=None, index_filterable=True, index_searchable=True, name='question', tokenization=<Tokenization.WORD: 'word'>), _Property(data_type=<DataType.TEXT: 'text'>, description=None, index_filterable=True, index_searchable=True, name='answer', tokenization=<Tokenization.WORD: 'word'>), _Property(data_type=_ReferenceDataType(target_collection='JeopardyCategory'), description=None, index_filterable=True, index_searchable=False, name='hasCategory', tokenization=None)], vectorizer=<Vect

In [6]:
# Upload data. Uses old client (Not sure why)

import weaviate_datasets
dataset = weaviate_datasets.JeopardyQuestions1k()  # instantiate dataset

old_client = weaviate.Client("http://localhost:8080")
dataset = weaviate_datasets.JeopardyQuestions10k()  # instantiate dataset
dataset.upload_objects(old_client, 100)

            Please instead use the `client.batch.configure()` method to configure your batch and `client.batch` to enter the context manager.
            See https://weaviate.io/developers/weaviate/client-libraries/python for details.
10000it [00:27, 363.08it/s]


True

In [5]:
# Test connection and upload

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.fetch_objects(
    limit=2
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "points": 100.0,
  "answer": "Jonah",
  "air_date": "2001-01-10T00:00:00Z",
  "question": "This prophet passed the time he spent inside a fish offering up prayers",
  "round": "Jeopardy!"
}
{
  "points": 400.0,
  "air_date": "2004-10-18T00:00:00Z",
  "answer": "lay eggs",
  "round": "Jeopardy!",
  "question": "Pythons are oviparous, meaning they do this"
}


In [8]:
# Pretty print output

response = jeopardy.query.fetch_objects( limit=1 )

for o in response.objects:
    print(json.dumps(o.properties, indent=2))
    
    

{
  "points": 100.0,
  "air_date": "2001-01-10T00:00:00Z",
  "answer": "Jonah",
  "question": "This prophet passed the time he spent inside a fish offering up prayers",
  "round": "Jeopardy!"
}


In [12]:
# BM25BasicPython

jeopardy = client.collections.get("JeopardyQuestion")
# highlight-start
response = jeopardy.query.bm25(
# highlight-end
    query="food",
    limit=3
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "points": 1000.0,
  "air_date": "1986-02-04T00:00:00Z",
  "answer": "food stores (supermarkets)",
  "round": "Double Jeopardy!",
  "question": "This type of retail store sells more shampoo & makeup than any other"
}
{
  "points": 200.0,
  "air_date": "2000-10-04T00:00:00Z",
  "answer": "cake",
  "round": "Jeopardy!",
  "question": "Devil's food & angel food are types of this dessert"
}
{
  "points": 800.0,
  "air_date": "2008-05-08T00:00:00Z",
  "answer": "a closer grocer",
  "question": "A nearer food merchant",
  "round": "Jeopardy!"
}


In [26]:
# BM25 Query with score / explainScore
 
import weaviate.classes as wvc

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    query="food",
    limit=3
)

print(response.objects)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))
    # highlight-start
    print(o.metadata.score)
    # highlight-end

[_Object(properties={'points': 1000.0, 'air_date': '1986-02-04T00:00:00Z', 'answer': 'food stores (supermarkets)', 'question': 'This type of retail store sells more shampoo & makeup than any other', 'round': 'Double Jeopardy!'}, metadata=_MetadataReturn(uuid=UUID('d6e58390-4f1e-524c-898c-e8aea413022e'), vector=None, creation_time_unix=1699889287053, last_update_time_unix=1699889288305, distance=None, certainty=None, score=3.045816421508789, explain_score=', BM25F_food_frequency:1, BM25F_food_propLength:3', is_consistent=False)), _Object(properties={'points': 200.0, 'air_date': '2000-10-04T00:00:00Z', 'answer': 'cake', 'question': "Devil's food & angel food are types of this dessert", 'round': 'Jeopardy!'}, metadata=_MetadataReturn(uuid=UUID('8345ec8a-507f-5dbe-b817-f64e92492f61'), vector=None, creation_time_unix=1699889287050, last_update_time_unix=1699889288301, distance=None, certainty=None, score=2.9159791469573975, explain_score=', BM25F_food_frequency:2, BM25F_food_propLength:9', 

In [22]:
# Limit

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    query="safety",
    # highlight-start
    limit=3
    # highlight-end
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "points": 600.0,
  "air_date": "2008-09-29T00:00:00Z",
  "answer": "OSHA (Occupational Safety and Health Administration)",
  "round": "Jeopardy!",
  "question": "The government admin. was created in 1971 to ensure occupational health & safety standards"
}
{
  "points": 800.0,
  "air_date": "1991-02-20T00:00:00Z",
  "answer": "France",
  "round": "Double Jeopardy!",
  "question": "Royale, Joseph, and Devil's Islands make up the Safety Islands owned by this country"
}
{
  "points": 300.0,
  "air_date": "1998-01-07T00:00:00Z",
  "answer": "Devil's Island",
  "question": "The Safety Islands off French Guiana consist of Royale, Saint-Joseph & this diabolical island",
  "round": "Jeopardy!"
}


In [24]:
# Autocut

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    query="safety",
    # highlight-start
    auto_limit=1
    # highlight-end
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "points": 600.0,
  "answer": "OSHA (Occupational Safety and Health Administration)",
  "air_date": "2008-09-29T00:00:00Z",
  "question": "The government admin. was created in 1971 to ensure occupational health & safety standards",
  "round": "Jeopardy!"
}


In [28]:
# Query with properties

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    query="safety",
    # highlight-start
    query_properties=["question"],
    # highlight-end
    limit=3
)

for o in response.objects:
    # highlight-start
    print(json.dumps(o.properties, indent=2))
    print(o.metadata.score)
    # highlight-start

{
  "points": 600.0,
  "answer": "OSHA (Occupational Safety and Health Administration)",
  "air_date": "2008-09-29T00:00:00Z",
  "round": "Jeopardy!",
  "question": "The government admin. was created in 1971 to ensure occupational health & safety standards"
}
3.333716630935669
{
  "points": 800.0,
  "answer": "France",
  "air_date": "1991-02-20T00:00:00Z",
  "question": "Royale, Joseph, and Devil's Islands make up the Safety Islands owned by this country",
  "round": "Double Jeopardy!"
}
3.237332820892334
{
  "points": 300.0,
  "air_date": "1998-01-07T00:00:00Z",
  "answer": "Devil's Island",
  "question": "The Safety Islands off French Guiana consist of Royale, Saint-Joseph & this diabolical island",
  "round": "Jeopardy!"
}
3.237332820892334


In [29]:
# Weighted

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    query="food",
    # highlight-start
    query_properties=["question^2", "answer"],
    # highlight-end
    limit=3
)

for o in response.objects:
    # highlight-start
    print(json.dumps(o.properties, indent=2))
    print(o.metadata.score)
    # highlight-start

{
  "points": 200.0,
  "air_date": "2000-10-04T00:00:00Z",
  "answer": "cake",
  "round": "Jeopardy!",
  "question": "Devil's food & angel food are types of this dessert"
}
4.037484645843506
{
  "points": 800.0,
  "answer": "a closer grocer",
  "air_date": "2008-05-08T00:00:00Z",
  "question": "A nearer food merchant",
  "round": "Jeopardy!"
}
3.8985471725463867
{
  "points": 1000.0,
  "air_date": "1986-02-04T00:00:00Z",
  "answer": "food stores (supermarkets)",
  "round": "Double Jeopardy!",
  "question": "This type of retail store sells more shampoo & makeup than any other"
}
3.275304079055786


In [35]:
jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    # highlight-start
    query="food wine", # search for food or wine
    # highlight-end
    query_properties=["question"],
    return_properties=["question"], # only return question property
    limit=5
)

for o in response.objects:
    print(o.properties["question"])


Wine, a ship, Croce's time
Devil's food & angel food are types of this dessert
Type of event in Cana at which Jesus turned water into wine
A nearer food merchant
Sparkling wine sold under the name Champagne must come from this region in Northeast France


In [36]:
import weaviate.classes as wvc

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.bm25(
    query="food",
    # highlight-start
    filters=wvc.Filter("round").equal("Double Jeopardy!"),
    # highlight-end
    return_properties=["answer", "question", "round"], # return these properties
    limit=3
)

for o in response.objects:
    print(json.dumps(o.properties, indent=2))

{
  "answer": "food stores (supermarkets)",
  "round": "Double Jeopardy!",
  "question": "This type of retail store sells more shampoo & makeup than any other"
}
{
  "answer": "honey",
  "round": "Double Jeopardy!",
  "question": "The primary source of this food is the Apis mellifera"
}
{
  "answer": "pseudopods",
  "round": "Double Jeopardy!",
  "question": "Amoebas use temporary extensions called these to move or to surround & engulf food"
}
