In [1]:
import weaviate

# Connect to the local instance deployed with Docker Compose
client = weaviate.connect_to_local()

client.is_ready()

True

In [4]:
# new collection with 1k objects and vectorizer and generative model

from weaviate.classes.config import Configure, Property, DataType

if client.collections.exists("Questions"):
    client.collections.delete("Questions")

# Create a collection here - with Cohere as a vectorizer
client.collections.create(
    name="Questions",

    vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="main",
            api_endpoint="http://host.docker.internal:11434", # If using Docker
            # api_endpoint="http://ollama:11434",             # If GitHub Codespaces
            model="snowflake-arctic-embed",                   # The model to use, e.g. "nomic-embed-text"
        ),  
    ],

    generative_config=Configure.Generative.ollama(
        api_endpoint="http://host.docker.internal:11434", # If using Docker
        # api_endpoint="http://ollama:11434",             # If GitHub Codespaces
        model="gemma2:2b"           # The model to use, e.g. "phi3", or "mistral", "command-r-plus", "gemma"
    ),

    # properties=[  # Define properties (Optional)
    #     Property(name="question", data_type=DataType.TEXT),
    #     Property(name="answer", data_type=DataType.TEXT),
    #     Property(name="category", data_type=DataType.TEXT, skip_vectorization=True),
    #     Property(name="round", data_type=DataType.TEXT, skip_vectorization=True),
    #     Property(name="points", data_type=DataType.NUMBER),
    #     Property(name="airDate", data_type=DataType.DATE),
    # ],
)

<weaviate.collections.collection.Collection at 0x116a88410>

In [5]:
import json

with open("./jeopardy_1k.json") as file:
    data_1k = json.load(file)

print(json.dumps(data_1k[0:2], indent=2))

[
  {
    "AirDate": "2006-11-08T00:00:00+00:00",
    "Round": "Double Jeopardy!",
    "Points": 800,
    "Category": "AMERICAN HISTORY",
    "Question": "Abraham Lincoln died across the street from this theatre on April 15, 1865",
    "Answer": "Ford's Theatre (the Ford Theatre accepted)"
  },
  {
    "AirDate": "2005-11-18T00:00:00+00:00",
    "Round": "Jeopardy!",
    "Points": 200,
    "Category": "RHYME TIME",
    "Question": "Any pigment on the wall so faded you can barely see it",
    "Answer": "faint paint"
  }
]


In [6]:
from tqdm import tqdm

questions = client.collections.get("Questions")

with questions.batch.dynamic() as batch:
    for item in tqdm(data_1k):
        batch.add_object(item)

100%|██████████| 1000/1000 [00:07<00:00, 142.51it/s]


In [7]:
response = questions.generate.near_text(
    query="musical instruments",
    limit=3,

    grouped_properties=["question", "answer"],
    grouped_task="What do these instruments have in common?"
)

print(response.generated)

The common thread connecting these items is that they **relate to music or sound in some way**. 

Let's break it down:

* **"Cute flute":** This refers directly to a musical instrument. 
* **"Behind the Music":**  This phrase suggests an exploration of the behind-the-scenes aspects of music, often focused on scandals or hidden stories. 
* **"A cue":** A "cue" in acting is a signal or prompt that guides the actor's performance, directly linked to sound cues used during performances.


Therefore, while they seem different, all three are related in some way to music and/or sound.  Let me know if you have more examples! 

