In [1]:
from dotenv import load_dotenv
import sys
sys.path.append('..')


load_dotenv()

True

In [2]:
from src.vector import EmbeddingManager, OpenAIEmbeddingProvider, FAISSVectorStore, EmbeddingConfig
from src.vector import create_file_content_map
import os

file_content_map = create_file_content_map(
    root_dir=os.path.join("..", "src", "vector"), accept_patterns=["*.py"])


embedding_provider = OpenAIEmbeddingProvider(
    config=EmbeddingConfig(model_name="text-embedding-3-small"))
vector_store = FAISSVectorStore(dimension=1536, index_path="faiss_index.bin")

embedding_manager = EmbeddingManager(
    embedding_provider=embedding_provider,
    vector_store=vector_store,
)


texts = list(file_content_map.values())
metadatas = [{"source": file_path} for file_path in file_content_map.keys()]

await embedding_manager.add_texts(texts=texts, metadatas=metadatas)

INFO:faiss.loader:Loading faiss with AVX512 support.
INFO:faiss.loader:Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.


Found 6 valid files


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [3]:
from src.query import StreamQueryProcessor
from src.query.prompt import PromptType, PromptProviderType
from langchain_openai import ChatOpenAI

stream_query_processor = StreamQueryProcessor   (embedding_manager=embedding_manager,
                                              llm=ChatOpenAI(model="gpt-4o-mini"))


final_result = ""

async for result in stream_query_processor.stream_ananlysis(query="What is the purpose of the VectorStore class? and keep it short", prompt_type=PromptType.FILE_WISE, prompt_provider=PromptProviderType.SEMANTIC):
    final_result += str(result.content)


final_result

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


6
0 6


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


1 6
AnalysisBatch(files=[WindowsPath('C:/Users/Utkarsh Priyadarshi/WebstormProjects/gep/src/vector/manager.py'), WindowsPath('C:/Users/Utkarsh Priyadarshi/WebstormProjects/gep/src/vector/__init__.py')], contents=['\nFile: C:\\Users\\Utkarsh Priyadarshi\\WebstormProjects\\gep\\src\\vector\\manager.py\nContent:\n    import time\n    from typing import Optional\n    from .embedding import EmbeddingProvider\n    from .vector import VectorStore, EmbeddingVector, SearchResult\n        class EmbeddingManager:\n        def __init__(self, embedding_provider: EmbeddingProvider, vector_store: VectorStore, batch_size: int = 100) -> None:\n        self.embedding_provider = embedding_provider\n            self.batch_size = batch_size\n            self.vector_store = vector_store\n            async def add_texts(self, texts: list[str], metadatas: Optional[list[dict]] = None) -> None:\n        try:\n        embeddings = await self.embedding_provider.embed_documents(texts)\n        vectors = []\n      

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


3 6
AnalysisBatch(files=[WindowsPath('C:/Users/Utkarsh Priyadarshi/WebstormProjects/gep/src/vector/file.py')], contents=['\nFile: C:\\Users\\Utkarsh Priyadarshi\\WebstormProjects\\gep\\src\\vector\\file.py\nContent:\n    import asyncio\n    import logging\n    import time\n    import aiofiles\n    import hashlib\n    import pathlib\n    from dataclasses import dataclass, field\n    from .manager import EmbeddingManager\n    from src.watcher import AsyncFileWatcher, default_ignore_patterns, FileEvent, EventType\n    from langchain_text_splitters import RecursiveCharacterTextSplitter\n    logging.basicConfig(level=logging.INFO)\n    logger = logging.getLogger(__name__)\n    @dataclass\n    class FileMetadata:\n        file_path: str\n    last_modified: float\n    content_hash: str\n    vector_ids: list[str]\n    chunks: list[str]\n    is_processed: bool = False\n        @dataclass\n        class FileManagerConfig:\n        batch_size: int = 10\n            ignored_patterns: list[str] = f

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


4 6
AnalysisBatch(files=[WindowsPath('C:/Users/Utkarsh Priyadarshi/WebstormProjects/gep/src/vector/embedding.py'), WindowsPath('C:/Users/Utkarsh Priyadarshi/WebstormProjects/gep/src/vector/utils.py')], contents=['\nFile: C:\\Users\\Utkarsh Priyadarshi\\WebstormProjects\\gep\\src\\vector\\embedding.py\nContent:\n    from abc import ABC, abstractmethod\n    from typing import Optional\n    from enum import Enum, auto\n    import logging\n    import asyncio\n    from langchain_openai import OpenAIEmbeddings\n        from tenacity import retry, stop_after_attempt, wait_exponential\n        logging.basicConfig(level=logging.INFO)\n        logger = logging.getLogger(__name__)\n        class EmbeddingProviderType(Enum):\n    OPENAI = auto()\n    CLAUDE = auto()\n    class EmbeddingConfig:\n    def __init__(self, model_name: str, api_key: Optional[str] = None, batch_size: int = 100, **kwargs) -> None:\n        self.model_name = model_name\n    self.api_key = api_key\n    self.batch_size = batc

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Streamed 6 results and 4 batches


"### [FILE]\nFile Path: C:\\Users\\Utkarsh Priyadarshi\\WebstormProjects\\gep\\src\\vector\\vector.py\n\n#### Key Concepts:\n- **Object-Oriented Programming (OOP)**: The file employs OOP principles, particularly through the use of abstract base classes (`VectorStore`) and data classes (`EmbeddingVector`, `SearchResult`, `VectorStoreConfig`).\n- **Asynchronous Programming**: Utilizes `async` and `await` keywords for non-blocking operations, particularly for I/O-bound tasks like adding and querying vectors in storage.\n- **Enumeration**: Defines a `VectorStoreType` enum for various vector store implementations, enhancing code clarity and maintainability.\n\n#### Primary Abstractions and Data Structures:\n- **EmbeddingVector**: Represents a vector with an associated ID, metadata, and textual content.\n- **SearchResult**: Encapsulates the result of a vector query, including the text, metadata, score, and source file.\n- **VectorStore**: An abstract base class that defines the interface for

In [5]:
with open("result.md", "w") as f:
    f.write(final_result)

