In [1]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it
print(type(data), len(data))

def json_print(data):
    print(json.dumps(data, indent=2))

json_print(data[0])

<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [4]:
import weaviate

client = weaviate.connect_to_local()

print(client.is_ready())

True


In [5]:
client.get_meta()

{'grpcMaxMessageSize': 104858000,
 'hostname': 'http://[::]:8080',
 'modules': {'generative-anthropic': {'documentationHref': 'https://docs.anthropic.com/en/api/getting-started',
   'name': 'Generative Search - Anthropic'},
  'generative-anyscale': {'documentationHref': 'https://docs.anyscale.com/endpoints/overview',
   'name': 'Generative Search - Anyscale'},
  'generative-aws': {'documentationHref': 'https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html',
   'name': 'Generative Search - AWS'},
  'generative-cohere': {'documentationHref': 'https://docs.cohere.com/reference/chat',
   'name': 'Generative Search - Cohere'},
  'generative-databricks': {'documentationHref': 'https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html#completion-task',
   'name': 'Generative Search - Databricks'},
  'generative-friendliai': {'documentationHref': 'https://docs.friendli.ai/openapi/create-chat-completions',
   'name': 'Generative Search - FriendliAI'},
 

In [None]:
from weaviate.classes.config import Configure
  
if client.collections.exists("DemoCollection"):
    client.collections.delete("DemoCollection")
  
client.collections.create(
    "DemoCollection",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="title_vector",    
            source_properties=["title"],
            api_endpoint="http://host.docker.internal:11434",  # If using Docker, use this to contact your local Ollama instance
            model="nomic-embed-text",  # The model to use, e.g. "nomic-embed-text"
        )
    ],
    generative_config=Configure.Generative.ollama(
        api_endpoint="http://host.docker.internal:11434",  # If using Docker, use this to contact your local Ollama instance
        model="llama3.1" 
    )
)

<weaviate.collections.collection.sync.Collection at 0x1c0b633fb90>

In [92]:
collection = client.collections.get("DemoCollection")

In [93]:
from weaviate.util import generate_uuid5 

with collection.batch.dynamic() as batch:
    for i, d in enumerate(data):
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        
        object_uuid = generate_uuid5(properties)
        print(f"importing question: {i+1} -> UUID {object_uuid}")
        print(properties)
        print()
        
        batch.add_object(
            properties = properties,
            uuid = object_uuid
        )
        
        if batch.number_errors > 10:
            print("Batch import stopped due to excessive errors.")
            break

importing question: 1 -> UUID 2e3d7a13-f97d-56a7-8237-e3b2ffdd8ce3
{'answer': 'Liver', 'question': 'This organ removes excess glucose from the blood & stores it as glycogen', 'category': 'SCIENCE'}

importing question: 2 -> UUID 75e0f50c-bddd-5d88-ae87-bb0a3133947a
{'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': 'ANIMALS'}

importing question: 3 -> UUID 01ae010e-f033-5ba1-9541-7efa117c8aef
{'answer': 'the nose or snout', 'question': 'The gavial looks very much like a crocodile except for this bodily feature', 'category': 'ANIMALS'}

importing question: 4 -> UUID 2ba855d5-89ee-5eb0-995c-362a2b6b1090
{'answer': 'Antelope', 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa', 'category': 'ANIMALS'}

importing question: 5 -> UUID 5cf0152e-3c9d-5279-8598-7ab034e42f58
{'answer': 'the diamondback rattler', 'question': 'Heaviest of all poisonous snakes is this North American rattlesnake', 'categor

In [94]:
failed_objects = collection.batch.failed_objects
if failed_objects:
    print(f"Number of failed imports: {len(failed_objects)}")
    print(f"First failed object: {failed_objects[0]}")

Number of failed imports: 10
First failed object: ErrorObject(message='WeaviateInsertManyAllFailedError(\'Every object failed during insertion. Here is the set of all errors: send POST request: Post "http://host.docker.internal:11434/api/embed": dial tcp 192.168.65.254:11434: connect: connection refused\')', object_=BatchObject(collection='DemoCollection', properties={'answer': 'Liver', 'question': 'This organ removes excess glucose from the blood & stores it as glycogen', 'category': 'SCIENCE'}, references=None, uuid='2e3d7a13-f97d-56a7-8237-e3b2ffdd8ce3', vector=None, tenant=None, index=0, retry_count=0), original_uuid=None)


In [None]:
for item in collection.iterator():
    print(item.uuid, item.properties)

In [44]:
collection.query.fetch_objects(
    include_vector=True,
    limit = 2
)

QueryReturn(objects=[])

In [45]:
response = collection.aggregate.over_all(total_count=True)

print(response.total_count)

0
