# Initial Setup

## Install Weaviate Python Client v4
> This notebook was created with Weaviate `1.24` and the Weaviate Client `4.5`

Run the below command to install the latest version of the Weaviate Python Client v4.

In [None]:
!pip install -U weaviate-client

## Deploy Weaviate

Weaviate offers 3 deployment options:
* Embedded
* Self-hosted - with Docker Compose
* Cloud deployment - [Weaviate Cloud Service](https://console.weaviate.cloud/)

# Time to Build

## Connect to Weaviate

* If you are new to OpenAI, register at [https://platform.openai.com](https://platform.openai.com/) and head to [https://platform.openai.com/api-keys](https://platform.openai.com/api-keys) to create your API key.
* If you are new to Cohere, register at [https://cohere.com](https://https://cohere.com) and head to [https://dashboard.cohere.com/api-keys](https://dashboard.cohere.com/api-keys) to create your API key.

In [None]:
import weaviate, os, json

# Connect with Weaviate Embedded
# client = weaviate.connect_to_embedded(
#     version="1.24.4",
#     headers={
#         "X-OpenAI-Api-Key": os.getenv("OPENAI_API_KEY"), # Replace with your inference API key
#         # "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"), # Replace with your inference API key
#     })

# Connect to a cloud instance of Weaviate (with WCS)
# client = weaviate.connect_to_wcs(
#     cluster_url=os.getenv("WCS_MM_DEMO_URL"),
#     auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WCS_MM_DEMO_KEY")),
#     headers={
#         "X-OpenAI-Api-Key": os.getenv("OPENAI_API_KEY"), # Replace with your inference API key
#         "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"), # Replace with your inference API key
#     }
# )

# Connect to the local instance deployed with Docker Compose
client = weaviate.connect_to_local(
    headers={
        "X-OpenAI-Api-Key": os.getenv("OPENAI_API_KEY"), # Replace with your inference API key
        "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"), # Replace with your inference API key

        "X-AWS-Access-Key": os.getenv("AWS_ACCESS_KEY"),
        "X-AWS-Secret-Key": os.getenv("AWS_SECRET_KEY"),
    }
)

client.is_ready()

In [None]:
import weaviate, os, json

# Connect to a cloud instance of Weaviate (with WCS)
client = weaviate.connect_to_wcs(
    cluster_url=os.getenv("WORKSHOP_DEMO_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WORKSHOP_DEMO_KEY_ADMIN")),

    headers={
        "X-OpenAI-Api-Key": os.getenv("OPENAI_API_KEY"), # Replace with your inference API key
        "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"), # Replace with your inference API key
        
        "X-AWS-Access-Key": os.getenv("AWS_ACCESS_KEY"),
        "X-AWS-Secret-Key": os.getenv("AWS_SECRET_KEY"),
    }
)

client.is_ready()

## Create a collection
[Weaviate Docs - collection creation and configuration](https://weaviate.io/developers/weaviate/configuration/schema-configuration)

In [None]:
from weaviate.classes.config import Configure

if client.collections.exists("Jeopardy"):
    client.collections.delete("Jeopardy")

# Create a collection here - with Cohere as a vectorizer
client.collections.create(
    name="Jeopardy",
    vectorizer_config=Configure.Vectorizer.text2vec_openai()
)

In [None]:
from weaviate.classes.config import Configure

if client.collections.exists("Jeopardy"):
    client.collections.delete("Jeopardy")

# Create a collection here
client.collections.create(
    name="Jeopardy",

    vectorizer_config=Configure.Vectorizer.text2vec_cohere(),

    # # Option 2 - Use Cohere embedding model through AWS Bedrock
    # vectorizer_config=Configure.Vectorizer.text2vec_aws(
    #     model="cohere.embed-english-v3",
    #     region="us-east-1"
    # ),

    # Option 3 - Use Titan Embed model 
    # vectorizer_config=Configure.Vectorizer.text2vec_aws(
    #     model="amazon.titan-embed-text-v1",
    #     # region="eu-central-1",
    #     region="us-east-1",
    # ),
)

## Import data

### Sample Data

In [None]:
import json
data_10 = json.load(open("./jeopardy_tiny.json"))

print(json.dumps(data_10, indent=2))

### Insert Many
[Weaviate Docs - insert many](https://weaviate.io/developers/weaviate/manage-data/import)

In [None]:
# Insert data
jeopardy = client.collections.get("Jeopardy")
jeopardy.data.insert_many(data_10)

### Data preview

In [None]:
# Show data preview
jeopardy = client.collections.get("Jeopardy")
response = jeopardy.query.fetch_objects(limit=4)

for item in response.objects:
    print(item.uuid, item.properties)

In [None]:
# Show data preview - with vectors
jeopardy = client.collections.get("Jeopardy")
response = jeopardy.query.fetch_objects(
    limit=4,
    include_vector=True
)

for item in response.objects:
    print(item.properties)
    print(item.vector, '\n')

### Super quick query example

In [None]:
response = jeopardy.query.near_text(
    # "Zwierzęta afrykańskie", #African animals in Polish
    # "アフリカの動物", #African animals in Japanese
    query="Afrikan animals",
    limit=2
)

for item in response.objects:
    print(item.properties)

## Create a collection with a Generative module

In [None]:
# new collection with 1k objects and OpenAI vectorizer and generative model

from weaviate.classes.config import Configure, Property, DataType

if client.collections.exists("Questions"):
    client.collections.delete("Questions")

# Create a collection here - with Cohere as a vectorizer
client.collections.create(
    name="Questions",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    generative_config=Configure.Generative.openai(model="gpt-4"),

    properties=[  # Define properties (Optional)
        Property(name="question", data_type=DataType.TEXT),
        Property(name="answer", data_type=DataType.TEXT),
        Property(name="category", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="round", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="points", data_type=DataType.NUMBER),
        Property(name="airDate", data_type=DataType.DATE),
    ],
)

In [None]:
# from weaviate.classes.config import Property, Configure, DataType

# client.collections.create(
#     name="Jeopardy",

#     vectorizer_config=[
#         Configure.Vectorizer.text2vec_openai(
#             name="question-vector",
#             source_properties=["question"]
#         ),

#         Configure.Vectorizer.text2vec_openai(
#             name="long-vector",
#             source_properties=["question", "answer", "category"]
#         ),
#     ],

#     properties=[  # Define properties (Optional)
#         Property(name="question", data_type=DataType.TEXT),
#         Property(name="answer", data_type=DataType.TEXT),
#         Property(name="category", data_type=DataType.TEXT, skip_vectorization=True),
#         Property(name="round", data_type=DataType.TEXT, skip_vectorization=True),
#         Property(name="points", data_type=DataType.NUMBER),
#         Property(name="airDate", data_type=DataType.DATE),
#     ],

# )

### Import data - 1k objects

In [None]:
import json
data_1k = json.load(open("./jeopardy_1k.json"))

print(json.dumps(data_1k, indent=2))

In [None]:
# Insert data
questions = client.collections.get("Questions")

with questions.batch.dynamic() as batch:
    for item in data_1k:
        batch.add_object(item)

if(len(questions.batch.failed_objects)>0):
    print("Import complete with errors")
    for err in questions.batch.failed_objects:
        print(err)
else:
    print("Import complete with no errors")

# questions.data.insert_many(data_1k)