In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
from typing import List, Dict
import faiss
import pickle

In [2]:
class QueryEngine:
    def __init__(self, model_name: str = 'sentence-transformers/all-mpnet-base-v2'):
        """Initialize query engine with embedding model"""
        self.model = SentenceTransformer(model_name)
        self.vector_store = None
    
    def load_vector_store(self, index_path: str, chunk_map_path: str):
        """Load vector store from saved files"""
        # Load FAISS index
        self.vector_store = faiss.read_index(index_path)
        
        # Load chunk mapping
        with open(chunk_map_path, 'rb') as f:
            self.chunk_map = pickle.load(f)
    
    def query(self, query_text: str, k: int = 5) -> List[Dict]:
        """Process query and return relevant documents"""
        # Generate query embedding
        query_vector = self.model.encode([query_text])[0]
        
        # Search vector store
        distances, indices = self.vector_store.search(
            query_vector.reshape(1, -1).astype('float32'), 
            k
        )
        
        # Format results
        results = []
        for i, idx in enumerate(indices[0]):
            if idx < len(self.chunk_map):
                result = self.chunk_map[idx].copy()
                result['distance'] = float(distances[0][i])
                results.append(result)
        
        return results
    
    def print_results(self, results: List[Dict]):
        """Print search results in a readable format"""
        print("\nSearch Results:")
        print("-" * 80)
        for i, result in enumerate(results, 1):
            print(f"\nResult {i}:")
            print(f"Document: {result['document']}")
            print(f"Relevance Score: {1/(1 + result['distance']):.3f}")
            print(f"Text Snippet: {result['text'][:200]}...")
            print("-" * 80)



In [3]:
# Example usage and test queries
if __name__ == "__main__":
    # Initialize query engine
    query_engine = QueryEngine()
    query_engine.load_vector_store(
        '../vector_store/faiss_index.idx', '../vector_store/chunk_map.pkl')

  

In [4]:
# Test Query 1: Risk Factors
print("\nTest Query 1: What are the risk factors for Tesla?")
results = query_engine.query("What are the risk factors for Tesla?", k=2)
query_engine.print_results(results)


Test Query 1: What are the risk factors for Tesla?

Search Results:
--------------------------------------------------------------------------------

Result 1:
Document: tsla-20231231-gen
Relevance Score: 0.594
Text Snippet: future. Any unfavorable ultimate outcome for Tesla may have a negative impact on the perception of Teslas treatment of our employees. Furthermore, we are directly or indirectly dependent upon companie...
--------------------------------------------------------------------------------

Result 2:
Document: tsla-20231231-gen
Relevance Score: 0.576
Text Snippet: 8-K, proxy statements and other information with the SEC. In addition, the SEC maintains a website www.sec.gov that contains reports, proxy and information statements, and other information regarding ...
--------------------------------------------------------------------------------


In [7]:
# Test Query 2: Revenue Comparison
print("\nTest Query 2: Compare the revenue growth between Uber and Google")
results = query_engine.query(
    "Compare the revenue growth between Uber and Google", k=10)
query_engine.print_results(results)


Test Query 2: Compare the revenue growth between Uber and Google

Search Results:
--------------------------------------------------------------------------------

Result 1:
Document: uber-10-k-2023
Relevance Score: 0.555
Text Snippet: 2022 Change Monthly Active Platform Consumers MAPCs 118 131 11 Trips 6,368 7,642 20 Gross Bookings 90,415 115,395 28 33 Revenue 17,455 31,877 83 90 Net loss attributable to Uber Technologies, Inc. 496...
--------------------------------------------------------------------------------

Result 2:
Document: uber-10-k-2023
Relevance Score: 0.553
Text Snippet: thousands, and per share amounts Year Ended December 31, 2020 2021 2022 Revenue 11,139 17,455 31,877 Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately bel...
--------------------------------------------------------------------------------

Result 3:
Document: uber-10-k-2023
Relevance Score: 0.546
Text Snippet: fiscal years, beginning after December 15, 2022

In [8]:
# Test Query 3: Business Model
print("\nTest Query 3: What are the main differences in business models between Tesla and Uber?")
results = query_engine.query(
        "What are the main differences in business models between Tesla and Uber?", k=10)
query_engine.print_results(results)


Test Query 3: What are the main differences in business models between Tesla and Uber?

Search Results:
--------------------------------------------------------------------------------

Result 1:
Document: uber-10-k-2023
Relevance Score: 0.529
Text Snippet: from point A to point B.Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform platforms or Platforms. Uber connectsconsumers Riders with inde...
--------------------------------------------------------------------------------

Result 2:
Document: uber-10-k-2023
Relevance Score: 0.528
Text Snippet: andCouriers, as well as contractors and consultants that support our global operations. In relation to those individuals who earn income on our platform, Uber is one of the largest open platforms for ...
--------------------------------------------------------------------------------

Result 3:
Document: tsla-20231231-gen
Relevance Score: 0.524
Text Snippet: Total stockholders equ