## Connect to Weaviate

### Connect with WCS
> If you have a [WCS](https://console.weaviate.cloud/) instance, you can connect by providing your `WCS cluster URL` and `cluster API key`.

>You also need to provide your OpenAI API key (note. this demo works with all Weaviate vectorizers)

In [1]:
import weaviate, os, json

# Connect to your Weaviate Client Service cluster
client = weaviate.connect_to_wcs(
    cluster_url=os.getenv("WORKSHOP_DEMO_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WORKSHOP_DEMO_KEY_ADMIN")),
    headers={
      "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
    }
)

client.is_ready()

True

### Connect with Docker
> You can also run this project with Docker Compose. See the provided docker-compose.yml file.

In [None]:
import weaviate, os, json

# Connect to your local Weaviate instance
client = weaviate.connect_to_local(
    headers={
      "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
    }
)

client.is_ready()

## Create a collection
> Collection stores your data and vector embeddings.

In [2]:
# Note: in practice, you shouldn't rerun this cell, as it deletes your data
# in "JeopardyQuestion", and then you need to re-import it again.
import weaviate.classes.config as wc

# Delete the collection if it already exists
if (client.collections.exists("Products")):
    client.collections.delete("Products")

client.collections.create(
    name="Products",

    properties=[ # defining properties (data schema) is optional
        wc.Property(name="name", data_type=wc.DataType.TEXT), 
        wc.Property(name="category", data_type=wc.DataType.TEXT, skip_vectorization=True), 
        wc.Property(name="sub_category", data_type=wc.DataType.TEXT, skip_vectorization=True), 
        wc.Property(name="ratings", data_type=wc.DataType.NUMBER), 
    ],

    # add vectorizer
    vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
    # add generator
    generative_config=wc.Configure.Generative.openai("gpt-4")
)

print("Successfully created collection: Products.")

Successfully created collection: Products.


## Import the Data

In [3]:
import csv

def parse_rating(r):
    try:
        return float(r)
    except ValueError:
        return None

def read_data(file_url):
    data = []
    with open(file_url, newline="") as csvfile:
        reader = csv.DictReader(csvfile, delimiter=",", quotechar='"')
        for row in reader:
            data.append({
                "name": row["name"],
                "category": row["main_category"],
                "sub_category": row["sub_category"],
                "ratings": parse_rating(row["ratings"]),
            })
    return data

cycling_data = read_data("data/Cycling.csv")
running_data = read_data("data/Running.csv")
yoga_data = read_data("data/Yoga.csv")

print(json.dumps(cycling_data[0], indent=2))
print(json.dumps(running_data[0], indent=2))
print(json.dumps(yoga_data[0], indent=2))

{
  "name": "QUXIS Portable High Pressure Foot Air Pump Heavy Compressor Cylinder with Pressure Gauge Floor Pump for Motorbike, for Car...",
  "category": "sports & fitness",
  "sub_category": "Cycling",
  "ratings": 4.0
}
{
  "name": "Amazon Brand - Symactive Men's Regular Fit T-Shirt",
  "category": "sports & fitness",
  "sub_category": "Running",
  "ratings": 4.0
}
{
  "name": "Dr Trust TPE Premium Luxfoam Yoga Mat for Gym Workout and Flooring Exercise Yoga Mat for Men and Women Fitness - 317",
  "category": "sports & fitness",
  "sub_category": "Yoga",
  "ratings": 4.4
}


In [4]:
products = client.collections.get("Products")

print("loading cycling data")
products.data.insert_many(cycling_data)

print("loading running data")
products.data.insert_many(running_data)

print("loading yoga data")
products.data.insert_many(yoga_data)

print("Import complete!")

loading cycling data
loading running data
loading yoga data
Import complete!


In [5]:
products.aggregate.over_all()

AggregateReturn(properties={}, total_count=3168)