In [43]:
import os
import weaviate
import pandas as pd
from weaviate.classes.init import Auth
from weaviate.util import generate_uuid5
import weaviate.classes as weaviate_classes

In [44]:
weaviate_url = os.environ["WEAVIATE_URL"]
weaviate_api_key = os.environ["WEAVIATE_API_KEY"]
cohere_api_key = os.environ["COHERE_APIKEY"]

# Connect to Weaviate Cloud
client = weaviate.connect_to_wcs(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
    headers={
        "X-Cohere-Api-Key": cohere_api_key
    }
)

print(client.is_ready())

True


In [45]:
# CREATE AN OBJECT COLLECTION

client.collections.create(
    # Set the name of the collection
    name="Movie",

    # Set modules to be used
    vectorizer_config=weaviate_classes.config.Configure.Vectorizer.text2vec_cohere(),    # Set the vectorizer module
    generative_config=weaviate_classes.config.Configure.Generative.cohere(),             # Set the generative module
    # Note: Could also explicitly set the model, e.g.:
    # generative_config=weaviate_classes.config.Configure.Generative.openai(model="gpt-4-1106-preview"),

    # Define the properties of the collection
    properties=[
        weaviate_classes.config.Property(
            # Set the name of the property
            name="title",
            # Set the data type of the property
            data_type=weaviate_classes.config.DataType.TEXT,
        ),
        weaviate_classes.config.Property(
            name="description",
            data_type=weaviate_classes.config.DataType.TEXT,
        ),
        weaviate_classes.config.Property(
            name="movie_id",
            data_type=weaviate_classes.config.DataType.INT,
        ),
        weaviate_classes.config.Property(
            name="year",
            data_type=weaviate_classes.config.DataType.INT,
        ),
        weaviate_classes.config.Property(
            name="rating",
            data_type=weaviate_classes.config.DataType.NUMBER,
        ),
        weaviate_classes.config.Property(
            name="director",
            data_type=weaviate_classes.config.DataType.TEXT,
            skip_vectorization=True,
        ),
    ],
)

<weaviate.collections.collection.sync.Collection at 0x2bf9e4f5c10>

In [46]:
# DATA LOADING IN DATAFRAME

movies_df = pd.read_csv('movies_data.csv')
movies_df.head()

Unnamed: 0,ID,Movie Title,Description,Director,Star Rating,Critic Review 1,Critic Review 2,Critic Review 3,Synopsis,Year
0,0,Arctic Chuckles,Penguins trying stand-up comedy to uplift spir...,Sofia Mendoza,4.0,An endearing and hilarious animation that both...,Mendoza showcases that humor is truly universa...,"Pure joy from start to finish, it's the feel-g...","In the heart of the frosty Arctic, where the s...",1974
1,1,Ballad of the Lonely Lighthouse,A reclusive lighthouse keeper's life is illumi...,Dmitri Ivanov,4.9,Ivanov’s storytelling brilliance shines as bri...,"A touching tale of isolation, connection, and ...","Between the vast sea and towering lighthouse, ...","In a remote coastal town, atop a craggy cliff ...",1963
2,2,Ballet & Bullets,An unexpected crime comedy about a mob boss wh...,Olga Ivanov,4.1,A dance of hilarity and unexpected twists. Iva...,"It's 'The Godfather' meets 'Billy Elliot', wit...",This ballet of bullets keeps you twirling betw...,"In the underbelly of New York City, Vincent ""V...",1995
3,3,Bazaar of Bewildering Bees,A beekeeper discovers her bees produce honey t...,Nia Jones,4.6,"Jones buzzes with creativity, crafting a world...","An un-bee-lievably heartwarming hive of humor,...",This bazaar isn't just bustling; it's a bee-au...,"Amidst the verdant landscapes of Hawaii, with ...",1991
4,4,Beneath Northern Lights,An arctic explorer finds a hidden village wher...,Magnus Olsson,4.6,Olsson paints a breathtaking icy tableau fille...,A chilling tale of wonder beneath the dance of...,From the icy landscapes to the warm village ta...,"In the remote expanses of the Arctic, Viktor L...",1988


In [65]:
movies = client.collections.get("Movie")   # Get the Movie collection

# print collection properties
print("Collection name: " + movies.name)


movie_objs = []

for i, row in movies_df.iterrows():
    properties={
        "title": row["Movie Title"],
        "description": row["Description"],
        "movie_id": row["ID"],
        "year": row["Year"],
        "rating": row["Star Rating"],
        "director": row["Director"]
    }

    movie_uuid = generate_uuid5(row["ID"])
    data_object = weaviate_classes.data.DataObject(
        properties=properties,
        uuid=movie_uuid
    )
    movie_objs.append(data_object)

response = movies.data.insert_many(movie_objs)
print(f"Insertion complete with {len(response.all_responses)} objects.")
print(f"Insertion errors: {len(response.errors)}.")

Collection name: Movie


WeaviateInsertManyAllFailedError: Every object failed during insertion. Here is the set of all errors: failed with status: 401 error: invalid api token