In [1]:
from dotenv import load_dotenv
import sys
sys.path.append('..')

load_dotenv()

True

In [2]:
from src.vector import create_file_content_map
import os

file_content_map = create_file_content_map(root_dir=os.path.join("..", "src"), accept_patterns=["*.py"])


INFO:faiss.loader:Loading faiss with AVX512 support.
INFO:faiss.loader:Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.


Found 16 valid files


In [3]:
from src.vector import EmbeddingManager, OpenAIEmbeddingProvider, FAISSVectorStore, EmbeddingConfig


embedding_provider = OpenAIEmbeddingProvider(config=EmbeddingConfig(model_name="text-embedding-3-small"))
vector_store = FAISSVectorStore(dimension=1536, index_path="faiss_index.bin")

embedding_manager = EmbeddingManager(
    embedding_provider=embedding_provider,
    vector_store=vector_store,
)

In [4]:
texts = list(file_content_map.values())
metadatas = [{"source": file_path} for file_path in file_content_map.keys()]

await embedding_manager.add_texts(texts=texts, metadatas=metadatas)



INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [5]:
results = await embedding_manager.similarity_search(query="What is the purpose of the EmbeddingManager class?", limit=10)
results = sorted(results, key=lambda x: x.score, reverse=True)

for result in results:
    print(result.score)
    print(result.text[:10])
    print("-" * 100)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


1.8250832557678223
import tim
----------------------------------------------------------------------------------------------------
1.8232173919677734
from .watc
----------------------------------------------------------------------------------------------------
1.8073463439941406
import log
----------------------------------------------------------------------------------------------------
1.5399296283721924
from datac
----------------------------------------------------------------------------------------------------
1.4746506214141846
import os

----------------------------------------------------------------------------------------------------
1.443137526512146
import ric
----------------------------------------------------------------------------------------------------
1.1859211921691895
import asy
----------------------------------------------------------------------------------------------------
1.1800724267959595
from abc i
------------------------------------------------------

In [6]:
from src.query import QueryProcessor, QueryType
from langchain_openai import ChatOpenAI

query_processor = QueryProcessor(embedding_manager=embedding_manager, llm=ChatOpenAI(model="gpt-4o-mini"))

In [7]:
query_results = await query_processor.process_query(query="What is the purpose of the VectorStore class?", query_type=QueryType.SEMANTIC)

query_results.analysis


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content="### Analysis of the `VectorStore` Class and Related Sections\n\n#### 1. How each section relates to the query\n\nThe `VectorStore` class is a central component of the provided code, which deals with managing and querying vectors in a vector store. Here are the relevant sections:\n\n- **VectorStore Class**: This is an abstract base class that defines the interface for all vector stores. It specifies essential methods like `add_vectors`, `query`, `delete`, and `clear`, which are fundamental operations for managing vectors.\n\n- **Concrete Implementations**: The `ChromaVectorStore`, `FAISSVectorStore`, and `PGVectorStore` classes are concrete implementations of the `VectorStore` class. Each of these implementations provides specific functionalities and optimizations for storing and querying vectors using different underlying technologies (Chroma, FAISS, and PostgreSQL with PGVector, respectively).\n\n- **EmbeddingManager**: This class coordinates the embedding process a

In [15]:
with open("result.md", "w") as f:
    f.write(query_results.analysis.content)

