In [None]:
! pip install weaviate-client fastembed

In [None]:
import os
import csv
import weaviate
import json
from weaviate.connect import ConnectionParams
from fastembed import TextEmbedding
from typing import List
import numpy as np

In [None]:
books = [*csv.DictReader(open('/usr/local/dataset/dataset.csv'))]
descriptions = [doc["description"] for doc in books]
embedding_model = TextEmbedding(model_name="BAAI/bge-small-en")

In [None]:
auth_config = weaviate.auth.AuthApiKey(api_key=os.getenv("APIKEY"))
client = weaviate.WeaviateClient(
    connection_params=ConnectionParams.from_params(
        http_host="weaviate.weaviate",
        http_port="80",
        http_secure=False,
        grpc_host="weaviate-grpc.weaviate",
        grpc_port="50051",
        grpc_secure=False,
    ),
    auth_client_secret=auth_config
)
client.connect()

Create or recreate a collection "Book":

In [None]:
if client.collections.exists("Book"):
    client.collections.delete("Book") 
collection = client.collections.create(
    name="Book",
    vectorizer_config=None
)

Insert data into the Weaviate collection:

In [None]:
with collection.batch.dynamic() as batch:
    for i, doc in enumerate(books):  # Batch import data
        vector=list(embedding_model.embed(doc["description"]))[0]
        print(f"importing book: {i+1}")
        batch.add_object(
            properties=doc,
            vector=vector
        )

Define the Weaviate query function. This function runs a vector search query and displays results.

It prints each result separated by a line of dashes, in the following format :

- Title: Title of the book
- Author: Author of the book
- Publish date: Book publication date
- Description: As stored in your document's description metadata field

In [None]:
def handle_query(query, limit):
    query_vector = list(embedding_model.embed([query]))[0]
    result = (
        collection.query.near_vector(
            near_vector=query_vector.tolist(),
            limit=limit
        )
    )
    for hit in result.objects:
        book = hit.properties
        print("Title: {}, Author: {}, Publish date: {}".format(book["title"], book["author"], book["publishDate"]))
        print(book["description"])
        print("---------")

Run the query `drama about people and unhappy love`:

In [None]:
handle_query("drama about people and unhappy love", 2)