In [2]:
%pip install qdrant-client langchain-qdrant qdrant-client pypdf sentence-transformers

Collecting langchain-qdrant
  Downloading langchain_qdrant-1.1.0-py3-none-any.whl.metadata (2.0 kB)
Downloading langchain_qdrant-1.1.0-py3-none-any.whl (24 kB)
Installing collected packages: langchain-qdrant
Successfully installed langchain-qdrant-1.1.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
from qdrant_client import QdrantClient
client = QdrantClient(url="http://localhost:6333")


In [7]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http import models
from langchain_community.embeddings import SentenceTransformerEmbeddings

# 1. Configuration (Matches your Docker Compose)
QDRANT_URL = "http://localhost:6333"
COLLECTION_NAME = "equity_research_reports"

def run_qdrant_pipeline(pdf_path):
    # 2. Load and Chunk PDF
    loader = PyPDFLoader(pdf_path)
    raw_docs = loader.load()
    
    # Using specific headers to simulate "Equity Framework" structure
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=80)
    chunks = splitter.split_documents(raw_docs)

    # 3. Initialize Embedding Model (Local MiniLM)
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    # 4. Ingest into Qdrant
    # .from_documents handles collection creation and indexing automatically
    vector_store = QdrantVectorStore.from_documents(
        documents=chunks,
        embedding=embeddings,
        url=QDRANT_URL,
        collection_name=COLLECTION_NAME,
        force_recreate=False # Set to True if you want to wipe the DB and restart
    )
    
    print(f"Ingested {len(chunks)} chunks into Qdrant collection: {COLLECTION_NAME}")
    return vector_store

def query_qdrant(query_text):
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    
    # Initialize client for specific retrieval
    client = QdrantClient(url=QDRANT_URL)
    
    vector_store = QdrantVectorStore(
        client=client,
        collection_name=COLLECTION_NAME,
        embedding=embeddings, 
    )
    
    # 5. Semantic Search
    results = vector_store.similarity_search(query_text, k=3)
    return results

if __name__ == "__main__":
    PDF_FILE = "./data/2023-Equity-Derivatives-2023-Latham-Watkins.pdf"
    
    if os.path.exists(PDF_FILE):
        # Step 1: Data Ingestion
        run_qdrant_pipeline(PDF_FILE)
        
        # Step 2: Querying
        user_query = "What is the outlook for equity derivatives?"
        matches = query_qdrant(user_query)
        
        for i, doc in enumerate(matches):
            page_num = doc.metadata.get('page', 'Unknown')
            print(f"\n--- Result {i+1} (Page {page_num}) ---")
            print(doc.page_content[:300] + "...")
    else:
        print(f"Please place {PDF_FILE} in the project directory.")


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 2996.06it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Ingested 510 chunks into Qdrant collection: equity_research_reports


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 2596.23it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m



--- Result 1 (Page 69) ---
accounting, cash flow, corporate or regulatory perspective.
Typical equity derivatives products that allow a shareholder to acquire a substantial position 
in a publicly traded equity or to monetise or hedge an existing equity position include the 
following:
• margin loans allow a borrower to finan...

--- Result 2 (Page 69) ---
accounting, cash flow, corporate or regulatory perspective.
Typical equity derivatives products that allow a shareholder to acquire a substantial position 
in a publicly traded equity or to monetise or hedge an existing equity position include the 
following:
• margin loans allow a borrower to finan...

--- Result 3 (Page 69) ---
accounting, cash flow, corporate or regulatory perspective.
Typical equity derivatives products that allow a shareholder to acquire a substantial position 
in a publicly traded equity or to monetise or hedge an existing equity position include the 
following:
• margin loans allow a borrower to finan...
