## Connect

In [None]:
import weaviate

# Connect to the local instance deployed with Docker Compose
client = weaviate.connect_to_local()

client.is_ready()

In [None]:
import weaviate, os

# Connect to a Weaviate Cloud instance
client = weaviate.connect_to_wcs(
    cluster_url=os.getenv("WORKSHOP_DEMO_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WORKSHOP_DEMO_KEY_ADMIN")),
)

client.is_ready()

## Load the data file

In [None]:
import json

with open("./questions-with-vectors.json") as file:
    data_1k = json.load(file)

print(json.dumps(data_1k[0], indent=2))

## Create a new collection with a vectorizer

In [None]:
from weaviate.classes.config import Configure, Property, DataType

client.collections.delete("Questions")

# Create a collection here - with text2vec_palm as a vectorizer
client.collections.create(
    name="Questions",
    
    # TODO: configure the text2vec_palm with the "embedding-001" model
    # vectorizer_config=Configure.Vectorizer.text2vec_palm

    properties=[  # Define properties (Optional)
        Property(name="question", data_type=DataType.TEXT),
        Property(name="answer", data_type=DataType.TEXT),
        Property(name="category", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="round", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="points", data_type=DataType.NUMBER),
        Property(name="airDate", data_type=DataType.DATE),
    ],
)

## Batch import data
[Weaviate Docs - Batch import data](https://weaviate.io/developers/weaviate/manage-data/import)

In [None]:
print(f"Importing {len(data_1k)} data items.")

counter = 0

questions = client.collections.get("Questions")
# TODO: configre batch with fixed size 100 and concurrent_requests set to 2
# with questions.batch... as batch:
    for item in data_1k:
        vector = item["vector"]["default"]
        data_to_insert = {   
            "round": item["round"],
            "points": item["points"],
            "answer": item["answer"],
            "airDate": item["airDate"],
            "question": item["question"],
            "category": item["category"],
        }

        # TODO add objects properties and vector to batch


        counter+=1
        if(counter % 100 == 0):
            print(f"Importing {counter}")

In [None]:
# check for errors at the end
if (len(questions.batch.failed_objects)==0):
    print("Import complete - no errors")
else:
    print(f"Import complete - with errors {len(questions.batch.failed_objects)}")
    print("Last Error: " + questions.batch.failed_objects[-1])

print(f"Imported {len(data_1k)} items.")

In [None]:
# TODO: run aggregate on questions

## Preview data with vectors

In [None]:
# TODO: fetch one object with the vector
# TODO: print properties and the vector

## Close the client when done

In [None]:
client.close()