In [16]:
!pip install weaviate-client


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Test 1 - make sure you can connect

> `TODO` - update the host to a url of your instance

In [1]:
import weaviate
import os
from dotenv import load_dotenv
from weaviate.classes.init import Auth

load_dotenv()
WEAVIATE_KEY = os.getenv("WEAVIATE_KEY")

# Connect to the local instance deployed with Docker Compose
client = weaviate.connect_to_local(
  host="127.0.0.1", # the address to the learner's instance
  port=8080,        # the default REST port
  grpc_port=50051,  # the default GRPC port
  auth_credentials=Auth.api_key(WEAVIATE_KEY)
)

client.is_ready() # Expected True

True

In [2]:
# Display the available modules - ideally you should be able to see 'generative-openai' and 'text2vec-openai'
client.get_meta()

{'grpcMaxMessageSize': 104858000,
 'hostname': 'http://[::]:8080',
 'modules': {'generative-anthropic': {'documentationHref': 'https://docs.anthropic.com/en/api/getting-started',
   'name': 'Generative Search - Anthropic'},
  'generative-anyscale': {'documentationHref': 'https://docs.anyscale.com/endpoints/overview',
   'name': 'Generative Search - Anyscale'},
  'generative-aws': {'documentationHref': 'https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html',
   'name': 'Generative Search - AWS'},
  'generative-cohere': {'documentationHref': 'https://docs.cohere.com/reference/chat',
   'name': 'Generative Search - Cohere'},
  'generative-databricks': {'documentationHref': 'https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html#completion-task',
   'name': 'Generative Search - Databricks'},
  'generative-friendliai': {'documentationHref': 'https://docs.friendli.ai/openapi/create-chat-completions',
   'name': 'Generative Search - FriendliAI'},
 

In [3]:
client.close()

## Test 2 - test your OpenAI configuration
> `TODO: 1` - update the `host` to a url of your instance

> `TODO: 2` - update the `base_url` (twice) to your OpenAI url where you host the models

In [None]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_URL = os.getenv("OPENAI_URL")
print(f"OpenAI API Key: {OPENAI_API_KEY}")
print(f"OpenAI URL: {OPENAI_URL}")

OpenAI API Key: sk-dummy-key-for-local-testing
OpenAI URL: http://host.docker.internal:11434


In [5]:
import weaviate
from weaviate.classes.init import Auth

# Connect to the local instance
client = weaviate.connect_to_local(
  host="127.0.0.1", # the address to the learner's instance
  port=8080,
  grpc_port=50051,
  auth_credentials=Auth.api_key(WEAVIATE_KEY),
  headers={
    "X-OpenAI-Api-Key": OPENAI_API_KEY
  }
)

print(client.is_ready())

True


In [None]:
from weaviate.classes.config import Configure

if client.collections.exists("TestCollection"):
    client.collections.delete("TestCollection")

client.collections.create(
    name="TestCollection",
    
    # Using local ollama embedding model
    vector_config=Configure.Vectors.text2vec_ollama(
        model="nomic-embed-text",
        api_endpoint=OPENAI_URL
    ),
    
    # Using local ollama generative model
    generative_config=Configure.Generative.ollama(
        model="qwen2.5:0.5b",
        api_endpoint=OPENAI_URL
    ),
)

<weaviate.collections.collection.sync.Collection at 0x128be0410>

### Load and vectorize data

In [7]:
import json

# prepare the sample data
with open("./jeopardy_tiny.json") as file:
    data_10 = json.load(file)

print(json.dumps(data_10[0:2], indent=2))

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  }
]


In [8]:
test_collection = client.collections.get("TestCollection")
test_collection.data.insert_many(data_10)

BatchObjectReturn(_all_responses=[UUID('45ccc131-4eb1-4a09-a98c-275cc99d976e'), UUID('95e760d5-a9a4-467b-bc33-0322ca4b21c9'), UUID('381d9153-dbb3-4781-a289-b7099df227e6'), UUID('687d0b18-20a7-4b7c-bb1a-3c7b6a445d7f'), UUID('99bf927d-6417-43c1-aa51-6db8415f1695'), UUID('825add8c-a8d6-4004-a52c-1efee29177d8'), UUID('0ca9c15a-925c-4fac-8167-4547479fdb29'), UUID('c74e40ef-6a9c-44f9-b217-5abc3e533cc0'), UUID('68c126e8-25a0-4701-8a5b-e92084b94a80'), UUID('a4d6aae1-9271-4b82-9cbb-a0cbefaddf88')], elapsed_seconds=0.49982786178588867, errors={}, uuids={0: UUID('45ccc131-4eb1-4a09-a98c-275cc99d976e'), 1: UUID('95e760d5-a9a4-467b-bc33-0322ca4b21c9'), 2: UUID('381d9153-dbb3-4781-a289-b7099df227e6'), 3: UUID('687d0b18-20a7-4b7c-bb1a-3c7b6a445d7f'), 4: UUID('99bf927d-6417-43c1-aa51-6db8415f1695'), 5: UUID('825add8c-a8d6-4004-a52c-1efee29177d8'), 6: UUID('0ca9c15a-925c-4fac-8167-4547479fdb29'), 7: UUID('c74e40ef-6a9c-44f9-b217-5abc3e533cc0'), 8: UUID('68c126e8-25a0-4701-8a5b-e92084b94a80'), 9: UUID('

### Check the data

In [9]:
# check the number of objects - should be 10
len(test_collection)

10

In [10]:
# Show data preview - with vectors
test_collection = client.collections.get("TestCollection")
response = test_collection.query.fetch_objects(
    limit=4,
    include_vector=True
)

for item in response.objects:
  print(item.properties)
  print(item.vector, '\n')

{'answer': 'wire', 'question': 'A metal that is ductile can be pulled into this while cold & under pressure', 'category': 'SCIENCE'}
{'default': [0.058831728994846344, 0.04262785613536835, -0.19099970161914825, -0.05198316276073456, 0.047998640686273575, 0.020649714395403862, 0.01461829524487257, -0.0031276310328394175, -0.01884767971932888, 0.00045859842794016004, -0.02183753065764904, 0.03696895018219948, -0.015009954571723938, -0.07586101442575455, 0.013792484998703003, -0.012114690616726875, 0.019102739170193672, -0.06798559427261353, 0.05759872868657112, 0.015061055310070515, -0.007698911242187023, -0.03911091014742851, 0.012917724438011646, -0.03853746876120567, 0.01696914993226528, 0.03173854202032089, -0.01786758005619049, 0.027190756052732468, -0.04193983972072601, -0.006748698651790619, 0.00860683061182499, -0.05947267264127731, -0.006091756746172905, -0.0816388875246048, -0.09862814098596573, -0.05260299891233444, 0.03543363884091377, -0.013365295715630054, 0.026335801929235

### Query example – it uses the OpenAI key

In [11]:
response = test_collection.query.near_text(
  query="African animals",
  limit=2
)

for item in response.objects:
  print(item.properties)

{'answer': 'Antelope', 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa', 'category': 'ANIMALS'}
{'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': 'ANIMALS'}


### RAG example

In [12]:
response = test_collection.generate.near_text(
    query="African animals",
    limit=2,
    single_prompt="Please translate {question} to german"
)

print("=== Source ===")
for item in response.objects:
    print(item.properties)
    print(item.generative.text)

=== Source ===
{'answer': 'Antelope', 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa', 'category': 'ANIMALS'}
Translation: "Picking up a hefty elephant, the eland is undoubtedly the largest creature within its native continent."
{'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': 'ANIMALS'}
Here is my translation of the given English text:

It is the only living mammal within the Order Proboseidea to German

This statement describes a specific species of mammals that are unique and closely related. The Order Proboseidea includes various groups of mammals, including some plants and fungi, which are not considered mammals in the conventional sense.

In this context, it's important to note that "german" is likely referring to a term or language used in German-speaking countries, rather than being an English word specifically associated with Germany. The statement suggests that this particular

In [13]:
client.close()