In [None]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
AI_STUDIO_KEY = os.getenv("GOOGLE_AI_STUDIO_KEY")
print(AI_STUDIO_KEY)

## Connect to Weaviate

### Connect with WCS
> If you have a [WCS](https://console.weaviate.cloud/) instance, you can connect by providing your `WCS cluster URL` and `cluster API key`.

>You also need to provide your OpenAI API key (note. this demo works with all Weaviate vectorizers)

In [None]:
import weaviate, os, json

# Connect to your Weaviate Client Service cluster
client = weaviate.connect_to_wcs(
    cluster_url=os.getenv("WORKSHOP_DEMO_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WORKSHOP_DEMO_KEY_ADMIN")),
    headers={
       "X-Google-Api-Key": AI_STUDIO_KEY,
    }
)

client.is_ready()

### Connect with Docker
> You can also run this project with Docker Compose. See the provided docker-compose.yml file.

In [None]:
import weaviate, os, json

# Connect to your local Weaviate instance
client = weaviate.connect_to_local(
    headers={
      "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
    }
)

client.is_ready()

## Create a collection
> Collection stores your data and vector embeddings.

In [None]:
# Note: in practice, you shouldn't rerun this cell, as it deletes your data
# in "JeopardyQuestion", and then you need to re-import it again.
from weaviate.classes.config import Property, DataType, Configure

# Delete the collection if it already exists
if (client.collections.exists("Products")):
    client.collections.delete("Products")

client.collections.create(
    name="Products",

    properties=[ # defining properties (data schema) is optional
        Property(name="name", data_type=DataType.TEXT), 
        Property(name="category", data_type=DataType.TEXT, skip_vectorization=True), 
        Property(name="sub_category", data_type=DataType.TEXT, skip_vectorization=True), 
        Property(name="ratings", data_type=DataType.NUMBER), 
    ],

    vectorizer_config=Configure.Vectorizer.text2vec_palm(
        model_id="embedding-001",

        project_id="-",
        api_endpoint="generativelanguage.googleapis.com"
    ),
)

print("Successfully created collection: Products.")

## Import the Data

In [None]:
import csv

def parse_rating(r):
    try:
        return float(r)
    except ValueError:
        return None

def read_data(file_url):
    data = []
    with open(file_url, newline="") as csvfile:
        reader = csv.DictReader(csvfile, delimiter=",", quotechar='"')
        for row in reader:
            data.append({
                "name": row["name"],
                "category": row["main_category"],
                "sub_category": row["sub_category"],
                "ratings": parse_rating(row["ratings"]),
            })
    return data

cycling_data = read_data("data/Cycling.csv")
running_data = read_data("data/Running.csv")
yoga_data = read_data("data/Yoga.csv")

print(json.dumps(cycling_data[0], indent=2))
print(json.dumps(running_data[0], indent=2))
print(json.dumps(yoga_data[0], indent=2))

In [None]:
products = client.collections.get("Products")

print("loading cycling data")
products.data.insert_many(cycling_data)

print("loading running data")
products.data.insert_many(running_data)

print("loading yoga data")
products.data.insert_many(yoga_data)

print("Import complete!")

In [None]:
products.aggregate.over_all()