In [12]:
import os
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient, models
from langchain_openai import OpenAIEmbeddings
from uuid import uuid4
import json
from langchain_core.documents import Document
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
# initialize the embeddings model
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
    dimensions=1024
)

In [14]:
# initialize the qdrant vector store
EMBEDDING_VECTOR_SIZE = 1024
qdrant_client = QdrantClient(
    url=os.getenv("qdrant_endpoint"),
    api_key=os.getenv("qdrant_api_key"),
    https=False,
    timeout=600,
)

qdrant_client.create_collection(
    collection_name="chai-docs",
    vectors_config=models.VectorParams(
        size=EMBEDDING_VECTOR_SIZE, distance=models.Distance.COSINE
    ),
)

qdrant = QdrantVectorStore(
    client=QdrantClient(
        url=os.getenv("qdrant_endpoint"),
        api_key=os.getenv("qdrant_api_key"),
        https=False,
        timeout=600,
    ),
    collection_name="chai-docs",
    embedding=OpenAIEmbeddings(model="text-embedding-3-large", dimensions=EMBEDDING_VECTOR_SIZE),
)

In [7]:
# load the web pages content
with open("web_pages_content_processed.json", "r") as f:
    web_pages_content = json.load(f)

In [15]:
# create documents to be embedded and uploaded to qdrant
documents = []
for web_page in web_pages_content:
    documents.append(
        Document(
            page_content=web_pages_content[web_page],
            metadata={
                "web_url": web_page
            },
        )
    )

In [16]:
uuids = [str(uuid4()) for _ in range(len(documents))]
print(len(uuids))
qdrant.add_documents(documents=documents, ids=uuids)

44


['3204e5ce-b3d1-46a3-8e18-1cafebfaaa5e',
 '26dfafed-f5f2-4122-a425-44b5d9ce313b',
 'dd232d79-3d97-4d5a-a996-d297799ca692',
 '44fa83fe-14b3-4512-831d-289d444fa3f0',
 '67b96e47-1876-453b-9695-0ef5ffaa999c',
 '9ebeb817-77f4-481f-87d4-0b8887a0f3bf',
 '71171a80-9bf6-4258-87ad-38993a710f1a',
 'a488178e-4e04-4040-9243-b712ef932b55',
 '3e7f8edb-a0f7-4f20-af17-4573b2d7733d',
 '2021da76-1626-457e-953f-bc8ee2a5e696',
 '36f92378-d435-434e-b526-45628061c7ed',
 '8b709549-3490-47d2-9815-ab55de314bd0',
 '97ab6f20-bbc0-44b5-84f1-49b886f0e5d2',
 'e2dc15c4-586b-43fe-ac3a-83865333de07',
 '81a37b9e-a604-470f-87d7-edef2aa71f5d',
 'b8c58902-aeec-4836-b76e-f03fbd7acbd1',
 'bd9ee4b8-b9c1-470a-8fc0-a22d91003829',
 'deaffd78-d353-4b4d-abb2-f7abf720e221',
 '1f46dd80-2321-41a7-85c6-fc184754dec6',
 '52485d4b-83b8-4a89-80be-cea89da91649',
 'b1371d5f-8c4f-47de-84dc-703ee790ff64',
 '94870873-1ab6-4d50-ac67-5effd423fc18',
 'c8e05791-3d69-4585-9d84-358cd8927901',
 'a730025f-20e0-49c8-a94d-e635d434f984',
 '14256bdc-06e1-